Skip to content

Commit ae83c32

Browse files
committed
Update scala helpers
1 parent 515b58e commit ae83c32

File tree

2 files changed

+29
-17
lines changed

2 files changed

+29
-17
lines changed

scala_helpers/MeasureDataQuality.scala

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import base.DefaultConfiguration
99
import org.deidentifier.arx.AttributeType.{Hierarchy, listMicroAggregationFunctions}
1010
import org.deidentifier.arx.aggregates.AggregateFunction
1111
import org.deidentifier.arx.aggregates.AggregateFunction.AggregateFunctionBuilder
12+
import org.deidentifier.arx.metric.Metric
1213
//import org.deidentifier.arx.aggregates.AggregateFunction.AggregateFunctionBuilder.*
1314
import org.deidentifier.arx.aggregates.HierarchyBuilderIntervalBased
1415
import org.deidentifier.arx.criteria.KAnonymity
@@ -24,6 +25,10 @@ import postprocessor.ResultPrinter.{printHandle, printHandleTop, printResult}
2425

2526
object MeasureDataQuality extends App{
2627

28+
// file to save the distorted dataset
29+
// produced by running K-anonymity on it
30+
val dataFileKAnonymityDist: File = new File("/home/alex/qi3/drl_anonymity/src/examples/q_learn_distorted_sets/kanonymity_distorted.csv")
31+
2732
def buildSalaryHierarchy: HierarchyBuilderIntervalBased[lang.Double] = {
2833

2934
val salaryHierarchy: HierarchyBuilderIntervalBased[lang.Double] = HierarchyBuilderIntervalBased.create(DataType.DECIMAL)
@@ -57,7 +62,7 @@ object MeasureDataQuality extends App{
5762
System.out.println(s"Number of rows ${dataOrg.getHandle.getNumRows}")
5863
System.out.println(s"Number of cols ${dataOrg.getHandle.getNumColumns}")
5964

60-
printHandleTop(handle = dataOrg.getHandle, n = 5)
65+
//printHandleTop(handle = dataOrg.getHandle, n = 5)
6166
System.out.println("Done...")
6267

6368
(dataOrg, dataDist)
@@ -72,8 +77,8 @@ object MeasureDataQuality extends App{
7277

7378
val summaryStatsDist = dataHandleDist.getStatistics().getSummaryStatistics(true)
7479
val summaryStatsOrg = dataHandleOrg.getStatistics().getSummaryStatistics(true)
75-
// getEquivalenceClassStatistics(); //getEquivalenceClassStatistics();
7680

81+
println(dataHandleDist.getStatistics().getEquivalenceClassStatistics)
7782
for((key, value) <- summaryStatsDist){
7883
println(s"Column: ${key}")
7984
println("-----------------------Distorted/Original")
@@ -93,7 +98,7 @@ object MeasureDataQuality extends App{
9398
val dataFile: File = new File("/home/alex/qi3/drl_anonymity/data/hierarchies/normalized_salary_mocksubjects.csv")
9499
val data: Data = Data.create(dataFile, Charset.defaultCharset, ',')
95100

96-
printHandleTop(handle = data.getHandle, n = 5)
101+
//printHandleTop(handle = data.getHandle, n = 5)
97102

98103
// set the attribute types if AttributeType.IDENTIFYING_ATTRIBUTE
99104
// then the attribute will be removed
@@ -109,47 +114,54 @@ object MeasureDataQuality extends App{
109114
// keep the diagnosis as an insensitive attribute
110115
data.getDefinition().setAttributeType("diagnosis", AttributeType.INSENSITIVE_ATTRIBUTE)
111116

112-
// quasi-sensitive attriutes we set the
117+
// quasi-sensitive attributes we set the
113118
// hierarchies
114119
// the ethnicity hierarchy file
115120
val ethnicityHierarchyFile: File = new File("/home/alex/qi3/drl_anonymity/data/hierarchies/ethnicity_hierarchy.csv")
116121
data.getDefinition().setAttributeType("ethnicity", Hierarchy.create(ethnicityHierarchyFile,
117-
StandardCharsets.UTF_8, ';'))/*AttributeType.QUASI_IDENTIFYING_ATTRIBUTE)*/
122+
StandardCharsets.UTF_8, ';'))
118123

119124
// the salary hierarchy
120-
//val salaryHierarchyFile: File = new File("/home/alex/qi3/drl_anonymity/data/hierarchies/salary_hierarchy.csv")
121125
data.getDefinition().setAttributeType("salary", buildSalaryHierarchy) //AttributeType.QUASI_IDENTIFYING_ATTRIBUTE)
122126

123-
124-
// create the ethnicity hierarchy
125-
//val ethnicityHierarchy = Hierarchy.create(ethnicityHierarchyFile,
126-
// Charset.defaultCharset, ',')
127-
128127
// create the hierarchies for the ethnicity and
129128
// salary
130129
// Create an instance of the anonymizer
131130
val anonymizer = new ARXAnonymizer
132131
val config = ARXConfiguration.create
133132
config.addPrivacyModel(new KAnonymity(5))
134133
config.setSuppressionLimit(0.02d)
135-
134+
config.setQualityModel(Metric.createEntropyMetric())
136135

137136
// anonymize the data using K-anonimity
138137
val result = anonymizer.anonymize(data, config)
139138

139+
val optimum = result.getGlobalOptimum
140+
140141
// Print info
141142
printResult(result, data)
142143

144+
// this forks a new thread???
145+
System.out.println(" - Statistics")
146+
System.out.println(result.getOutput(result.getGlobalOptimum, false).getStatistics.getEquivalenceClassStatistics)
147+
148+
// save the dataset to disk
149+
//result.getOutput(optimum).save(dataFileKAnonymityDist, ',')
150+
143151
// Process results
144-
System.out.println(" - Transformed data:")
145-
printHandle(handle = result.getOutput(false))
152+
//System.out.println(" - Transformed data:")
153+
//printHandle(handle = result.getOutput(false))
146154
System.out.println("Done!")
147155

148156
}
149157

150158
// execute Experiment 1
151-
//experiment1
159+
println("=================================")
160+
println("Running Experiment 1")
161+
experiment1
152162

163+
println("=================================")
164+
println("Running K-anonymity")
153165
//exploreHierarchy
154166
// run K-anonimity
155167
runKAnonimity

scala_helpers/ResultPrinter.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ object ResultPrinter {
2828
System.out.println(" - No solution found!")
2929
return
3030
}
31+
3132
// Initialize
3233
val identifiers = new Array[StringBuffer](qis.size)
3334
val generalizations = new Array[StringBuffer](qis.size)
@@ -59,8 +60,7 @@ object ResultPrinter {
5960
for (i <- 0 until qis.size) {
6061
System.out.println(" * " + identifiers(i) + ": " + generalizations(i))
6162
}
62-
System.out.println(" - Statistics")
63-
System.out.println(result.getOutput(result.getGlobalOptimum, false).getStatistics.getEquivalenceClassStatistics)
63+
6464
}
6565

6666
def printHandle(handle: DataHandle): Unit = {

0 commit comments

Comments
 (0)