Skip to content

Commit b3d3291

Browse files
committed
docs: 📝 Docs for KRICT_DATA
1 parent 2ac013d commit b3d3291

File tree

4 files changed

+18
-217
lines changed

4 files changed

+18
-217
lines changed

src/models/KRICT_DATA/README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# A Public Database of Thermoelectric Materials and System-Identified Material Representation for Data-Driven Discovery
2+
3+
Source: https://github.com/KRICT-DATA/SIMD
4+
5+
License: CC BY 4.0, https://creativecommons.org/licenses/by/4.0/
6+
7+
This database is created from a database released for [Na, G. S., & Chang, H. (2022). A public database of thermoelectric materials and system-identified material representation for data-driven discovery. npj Computational Materials, 8(1), 214.](https://www.nature.com/articles/s41524-022-00897-2).
8+
9+
The ESTM dataset (in "estm.xlsx") covers 880 unique thermoelectric materials and provides five experimentally measured thermoelectric properties: Seebeck coefficient, electrical conductivity, thermal conductivity, power factor, and figure of merit (ZT). There are a total of 5205 rows in the dataset.
10+
11+
Changes include:
12+
- Standardized the DOI Format
13+
- Introduced constraints on specific columns like temperature and thermal conductivity
Lines changed: 4 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,27 @@
11
// XSLXInterpeter does not work, see https://github.com/jvalue/jayvee/issues/603
22

3-
/*
4-
These datasets are based on the paper cited as "Na, G. S., & Chang, H. (2022). A public database of thermoelectric materials and system-identified material representation for data-driven discovery. npj Computational Materials, 8(1), 214.".
5-
The ESTM dataset covers 880 unique thermoelectric materials and provides five experimentally measured thermoelectric properties: Seebeck coefficient, electrical conductivity, thermal conductivity, power factor, and figure of merit (ZT)".
6-
In this paper, a machine learning approach is devised through which the ZT values for different materials from unexplored material groups were predicted and R2-score from 0.13 to 0.71 in an extrapolation problem.
7-
8-
Coming to the data engiineering pipeline, the earlier version had 5 sheets, but we removed the "results_extrapol_0.xlsx", "results_extrapol_1.xlsx" & "results_extrapol_2.xlsx", because they were the truncated versions of
9-
"preds_sxgb.xlsx" and no new data. So, we have kept only "estm.xlsx" & "preds_sxgb.xlsx" & removed the other redundant datasets. There are a total of 5205 rows in both the datasets, before and after the pipeline is executed.
10-
Other changes that we made were changing the datatype of various columns. Previously all of them were text, whereas, we changed them to appropriate datatypes, as we saw fit. We have also introduced constraints on specific columns like
11-
temperature & Thermal conductivity, where we mentioned the range of values that the columns can allow.
12-
13-
We have also used Transform blocks on the Reference column to standardize the DOI URLs, so that all of them start with 10.xx.xxx instead of http://xxx or https://xxxx.
14-
15-
All the changes have been discussed in details just before the part of the code that causes it.
16-
*/
17-
18-
//In the paper the range of the temperature is mentioned to be between 10 & 1275 kelvin. So, we have kept that as the allowed range of the temperature.(refer to page 2)
3+
// The paper mentions temperature to be between 10 & 1275 kelvin, see page 2
194
constraint TemperatureRange oftype RangeConstraint {
205
lowerBound: 10.0;
216
upperBound: 1275.0;
227
}
238
valuetype Temperature oftype decimal {
24-
constraints:
25-
[
9+
constraints: [
2610
TemperatureRange
2711
];
2812
}
2913

30-
//In the paper the range of the Thermal conductivity is mentioned to be between 0.07 & 77.16 W/mK and that is the allowed range for our datatype. (refer to page 2)
14+
// The paper mentions thermal conductivity to be between 0.07 & 77.16 W/mK , see page 2
3115
constraint ThermalConductivityRange oftype RangeConstraint {
3216
lowerBound: 0.07;
3317
upperBound: 77.16;
3418
}
3519
valuetype ThermalConductivity oftype decimal {
36-
constraints:
37-
[
20+
constraints: [
3821
ThermalConductivityRange
3922
];
4023
}
4124

42-
4325
pipeline ThermoElectricMaterialsPipeline {
4426

4527
ThermoElectricMaterialsESTMExtractor
@@ -49,32 +31,17 @@ pipeline ThermoElectricMaterialsPipeline {
4931
-> DOIStandardizerBlock
5032
-> ThermoElectricMaterialsESTMDatabaseLoader;
5133

52-
ThermoElectricMaterialsPredsSxgbExtractor
53-
-> ThermoElectricMaterialsPredsSxgbXLSXInterpreter
54-
-> ThermoElectricMaterialsPredsSxgbSheetPicker
55-
-> ThermoElectricMaterialsPredsSxgbTableInterpreter
56-
-> ThermoElectricMaterialsPredsSxgbDatabaseLoader;
57-
5834
block ThermoElectricMaterialsESTMExtractor oftype HttpExtractor {
5935
url: "https://github.com/KRICT-DATA/SIMD/raw/main/dataset/estm.xlsx";
6036
}
6137

62-
block ThermoElectricMaterialsPredsSxgbExtractor oftype HttpExtractor {
63-
url: "https://github.com/KRICT-DATA/SIMD/raw/main/results/preds_sxgb.xlsx";
64-
}
65-
6638
block ThermoElectricMaterialsESTMXLSXInterpreter oftype XLSXInterpreter { }
6739

68-
block ThermoElectricMaterialsPredsSxgbXLSXInterpreter oftype XLSXInterpreter { }
6940

7041
block ThermoElectricMaterialsESTMSheetPicker oftype SheetPicker {
7142
sheetName: "Sheet1";
7243
}
7344

74-
block ThermoElectricMaterialsPredsSxgbSheetPicker oftype SheetPicker {
75-
sheetName: "Sheet1";
76-
}
77-
7845
block DOIStandardizerBlock oftype DOIStandardizer {
7946
doiColumn: 'reference';
8047
}
@@ -93,24 +60,8 @@ pipeline ThermoElectricMaterialsPipeline {
9360
];
9461
}
9562

96-
block ThermoElectricMaterialsPredsSxgbTableInterpreter oftype TableInterpreter {
97-
header: false;
98-
columns: [
99-
"Formula" oftype text,
100-
"Temperature" oftype Temperature,
101-
"Original_ZT" oftype text,
102-
"Predicted_ZT" oftype decimal,
103-
"Difference_in_Predictions" oftype decimal
104-
];
105-
}
106-
10763
block ThermoElectricMaterialsESTMDatabaseLoader oftype SQLiteLoader {
10864
table: "ESTM";
10965
file: "./ThermoElectricMaterialsDatabase.sqlite";
11066
}
111-
112-
block ThermoElectricMaterialsPredsSxgbDatabaseLoader oftype SQLiteLoader {
113-
table: "PredsSxgb";
114-
file: "./ThermoElectricMaterialsDatabase.sqlite";
115-
}
11667
}

src/models/KRICT_DATA/thermoelectricMaterialsModel.jv

Lines changed: 0 additions & 163 deletions
This file was deleted.

src/shared/transforms.jv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ publish transform TakeUntilComma {
3838
textWithoutComma: textWithComma replace /,.*$/ with "";
3939
}
4040

41-
// parse a decimal
41+
// Parse a text as decimal
4242
publish transform ParseDecimal {
4343
from decText oftype text;
4444
to dec oftype decimal;

0 commit comments

Comments
 (0)