@@ -9,6 +9,7 @@ import base.DefaultConfiguration
99import org .deidentifier .arx .AttributeType .{Hierarchy , listMicroAggregationFunctions }
1010import org .deidentifier .arx .aggregates .AggregateFunction
1111import org .deidentifier .arx .aggregates .AggregateFunction .AggregateFunctionBuilder
12+ import org .deidentifier .arx .metric .Metric
1213// import org.deidentifier.arx.aggregates.AggregateFunction.AggregateFunctionBuilder.*
1314import org .deidentifier .arx .aggregates .HierarchyBuilderIntervalBased
1415import org .deidentifier .arx .criteria .KAnonymity
@@ -24,6 +25,10 @@ import postprocessor.ResultPrinter.{printHandle, printHandleTop, printResult}
2425
2526object MeasureDataQuality extends App {
2627
28+ // file to save the distorted dataset
29+ // produced by running K-anonymity on it
30+ val dataFileKAnonymityDist : File = new File (" /home/alex/qi3/drl_anonymity/src/examples/q_learn_distorted_sets/kanonymity_distorted.csv" )
31+
2732 def buildSalaryHierarchy : HierarchyBuilderIntervalBased [lang.Double ] = {
2833
2934 val salaryHierarchy : HierarchyBuilderIntervalBased [lang.Double ] = HierarchyBuilderIntervalBased .create(DataType .DECIMAL )
@@ -57,7 +62,7 @@ object MeasureDataQuality extends App{
5762 System .out.println(s " Number of rows ${dataOrg.getHandle.getNumRows}" )
5863 System .out.println(s " Number of cols ${dataOrg.getHandle.getNumColumns}" )
5964
60- printHandleTop(handle = dataOrg.getHandle, n = 5 )
65+ // printHandleTop(handle = dataOrg.getHandle, n = 5)
6166 System .out.println(" Done..." )
6267
6368 (dataOrg, dataDist)
@@ -72,8 +77,8 @@ object MeasureDataQuality extends App{
7277
7378 val summaryStatsDist = dataHandleDist.getStatistics().getSummaryStatistics(true )
7479 val summaryStatsOrg = dataHandleOrg.getStatistics().getSummaryStatistics(true )
75- // getEquivalenceClassStatistics(); //getEquivalenceClassStatistics();
7680
81+ println(dataHandleDist.getStatistics().getEquivalenceClassStatistics)
7782 for ((key, value) <- summaryStatsDist){
7883 println(s " Column: ${key}" )
7984 println(" -----------------------Distorted/Original" )
@@ -93,7 +98,7 @@ object MeasureDataQuality extends App{
9398 val dataFile : File = new File (" /home/alex/qi3/drl_anonymity/data/hierarchies/normalized_salary_mocksubjects.csv" )
9499 val data : Data = Data .create(dataFile, Charset .defaultCharset, ',' )
95100
96- printHandleTop(handle = data.getHandle, n = 5 )
101+ // printHandleTop(handle = data.getHandle, n = 5)
97102
98103 // set the attribute types if AttributeType.IDENTIFYING_ATTRIBUTE
99104 // then the attribute will be removed
@@ -109,47 +114,54 @@ object MeasureDataQuality extends App{
109114 // keep the diagnosis as an insensitive attribute
110115 data.getDefinition().setAttributeType(" diagnosis" , AttributeType .INSENSITIVE_ATTRIBUTE )
111116
112- // quasi-sensitive attriutes we set the
117+ // quasi-sensitive attributes we set the
113118 // hierarchies
114119 // the ethnicity hierarchy file
115120 val ethnicityHierarchyFile : File = new File (" /home/alex/qi3/drl_anonymity/data/hierarchies/ethnicity_hierarchy.csv" )
116121 data.getDefinition().setAttributeType(" ethnicity" , Hierarchy .create(ethnicityHierarchyFile,
117- StandardCharsets .UTF_8 , ';' ))/* AttributeType.QUASI_IDENTIFYING_ATTRIBUTE) */
122+ StandardCharsets .UTF_8 , ';' ))
118123
119124 // the salary hierarchy
120- // val salaryHierarchyFile: File = new File("/home/alex/qi3/drl_anonymity/data/hierarchies/salary_hierarchy.csv")
121125 data.getDefinition().setAttributeType(" salary" , buildSalaryHierarchy) // AttributeType.QUASI_IDENTIFYING_ATTRIBUTE)
122126
123-
124- // create the ethnicity hierarchy
125- // val ethnicityHierarchy = Hierarchy.create(ethnicityHierarchyFile,
126- // Charset.defaultCharset, ',')
127-
128127 // create the hierarchies for the ethnicity and
129128 // salary
130129 // Create an instance of the anonymizer
131130 val anonymizer = new ARXAnonymizer
132131 val config = ARXConfiguration .create
133132 config.addPrivacyModel(new KAnonymity (5 ))
134133 config.setSuppressionLimit(0.02d )
135-
134+ config.setQualityModel( Metric .createEntropyMetric())
136135
137136 // anonymize the data using K-anonimity
138137 val result = anonymizer.anonymize(data, config)
139138
139+ val optimum = result.getGlobalOptimum
140+
140141 // Print info
141142 printResult(result, data)
142143
144+ // this forks a new thread???
145+ System .out.println(" - Statistics" )
146+ System .out.println(result.getOutput(result.getGlobalOptimum, false ).getStatistics.getEquivalenceClassStatistics)
147+
148+ // save the dataset to disk
149+ // result.getOutput(optimum).save(dataFileKAnonymityDist, ',')
150+
143151 // Process results
144- System .out.println(" - Transformed data:" )
145- printHandle(handle = result.getOutput(false ))
152+ // System.out.println(" - Transformed data:")
153+ // printHandle(handle = result.getOutput(false))
146154 System .out.println(" Done!" )
147155
148156 }
149157
150158 // execute Experiment 1
151- // experiment1
159+ println(" =================================" )
160+ println(" Running Experiment 1" )
161+ experiment1
152162
163+ println(" =================================" )
164+ println(" Running K-anonymity" )
153165 // exploreHierarchy
154166 // run K-anonimity
155167 runKAnonimity
0 commit comments