@@ -6,18 +6,42 @@ package examples.example_3
66
77
88import base .DefaultConfiguration
9- import org .deidentifier .arx .Data
10- import postprocessor .ResultPrinter .printHandleTop
11-
12- // import scala.collection.JavaConversions._
13- // import collection.convert.ImplicitConversionsToScala.map AsScala
14- import collection .JavaConverters .* // asScala
9+ import org .deidentifier .arx .AttributeType .{Hierarchy , listMicroAggregationFunctions }
10+ import org .deidentifier .arx .aggregates .AggregateFunction
11+ import org .deidentifier .arx .aggregates .AggregateFunction .AggregateFunctionBuilder
12+ // import org.deidentifier.arx.aggregates.AggregateFunction.AggregateFunctionBuilder.*
13+ import org .deidentifier .arx .aggregates .HierarchyBuilderIntervalBased
14+ import org .deidentifier .arx .criteria .KAnonymity
15+ import org .deidentifier .arx .{ARXAnonymizer , ARXConfiguration , AttributeType , Data , DataType }
16+
17+ import java .lang
18+ import collection .JavaConverters .*
1519import collection .convert .ImplicitConversions .*
1620import java .io .File
17- import java .nio .charset .Charset
21+ import java .nio .charset .{Charset , StandardCharsets }
22+ import postprocessor .ResultPrinter .{printHandle , printHandleTop , printResult }
23+
1824
1925object MeasureDataQuality extends App {
2026
27+ def buildSalaryHierarchy : HierarchyBuilderIntervalBased [lang.Double ] = {
28+
29+ val salaryHierarchy : HierarchyBuilderIntervalBased [lang.Double ] = HierarchyBuilderIntervalBased .create(DataType .DECIMAL )
30+ val aggregateFunctionBuilder = AggregateFunction .forType(DataType .DECIMAL )
31+
32+ salaryHierarchy.addInterval(lang.Double (0.0 ), lang.Double (0.2222222222222222 ), aggregateFunctionBuilder.createArithmeticMeanOfBoundsFunction())
33+ salaryHierarchy.addInterval(lang.Double (0.2222222222222222 ), lang.Double (0.4444444444444444 ), aggregateFunctionBuilder.createArithmeticMeanOfBoundsFunction())
34+ salaryHierarchy.addInterval(lang.Double (0.4444444444444444 ), lang.Double (0.6666666666666666 ), aggregateFunctionBuilder.createArithmeticMeanOfBoundsFunction())
35+ salaryHierarchy.addInterval(lang.Double (0.6666666666666666 ), lang.Double (0.8888888888888888 ), aggregateFunctionBuilder.createArithmeticMeanOfBoundsFunction())
36+ salaryHierarchy.addInterval(lang.Double (0.8888888888888888 ), lang.Double (1.1111111111111112 ), aggregateFunctionBuilder.createArithmeticMeanOfBoundsFunction())
37+ salaryHierarchy.addInterval(lang.Double (1.1111111111111112 ), lang.Double (1.3333333333333333 ), aggregateFunctionBuilder.createArithmeticMeanOfBoundsFunction())
38+ salaryHierarchy.addInterval(lang.Double (1.3333333333333333 ), lang.Double (1.5555555555555554 ), aggregateFunctionBuilder.createArithmeticMeanOfBoundsFunction())
39+ salaryHierarchy.addInterval(lang.Double (1.5555555555555554 ), lang.Double (1.7777777777777777 ), aggregateFunctionBuilder.createArithmeticMeanOfBoundsFunction())
40+ salaryHierarchy.addInterval(lang.Double (1.7777777777777777 ), lang.Double (2.0 ), aggregateFunctionBuilder.createArithmeticMeanOfBoundsFunction())
41+
42+ salaryHierarchy
43+ }
44+
2145 def loadData : Tuple2 [Data , Data ] = {
2246
2347 val dataFileOrg : File = new File (" /home/alex/qi3/drl_anonymity/src/examples/q_learn_distorted_sets/distorted_set_-1" )
@@ -64,14 +88,70 @@ object MeasureDataQuality extends App{
6488
6589 def runKAnonimity : Unit = {
6690
67- val data = loadData
91+ // load the data
92+ // val dataFile: File = new File("/home/alex/qi3/drl_anonymity/data/mocksubjects.csv")
93+ val dataFile : File = new File (" /home/alex/qi3/drl_anonymity/data/hierarchies/normalized_salary_mocksubjects.csv" )
94+ val data : Data = Data .create(dataFile, Charset .defaultCharset, ',' )
95+
96+ printHandleTop(handle = data.getHandle, n = 5 )
97+
98+ // set the attribute types if AttributeType.IDENTIFYING_ATTRIBUTE
99+ // then the attribute will be removed
100+ data.getDefinition().setAttributeType(" preventative_treatment" , AttributeType .IDENTIFYING_ATTRIBUTE )
101+ data.getDefinition().setAttributeType(" gender" , AttributeType .IDENTIFYING_ATTRIBUTE )
102+ data.getDefinition().setAttributeType(" education" , AttributeType .IDENTIFYING_ATTRIBUTE )
103+ data.getDefinition().setAttributeType(" mutation_status" , AttributeType .IDENTIFYING_ATTRIBUTE )
104+ data.getDefinition().setAttributeType(" NHSno" , AttributeType .IDENTIFYING_ATTRIBUTE )
105+ data.getDefinition().setAttributeType(" given_name" , AttributeType .IDENTIFYING_ATTRIBUTE )
106+ data.getDefinition().setAttributeType(" surname" , AttributeType .IDENTIFYING_ATTRIBUTE )
107+ data.getDefinition().setAttributeType(" dob" , AttributeType .IDENTIFYING_ATTRIBUTE )
108+
109+ // keep the diagnosis as an insensitive attribute
110+ data.getDefinition().setAttributeType(" diagnosis" , AttributeType .INSENSITIVE_ATTRIBUTE )
111+
112+ // quasi-sensitive attriutes we set the
113+ // hierarchies
114+ // the ethnicity hierarchy file
115+ val ethnicityHierarchyFile : File = new File (" /home/alex/qi3/drl_anonymity/data/hierarchies/ethnicity_hierarchy.csv" )
116+ data.getDefinition().setAttributeType(" ethnicity" , Hierarchy .create(ethnicityHierarchyFile,
117+ StandardCharsets .UTF_8 , ';' ))/* AttributeType.QUASI_IDENTIFYING_ATTRIBUTE)*/
118+
119+ // the salary hierarchy
120+ // val salaryHierarchyFile: File = new File("/home/alex/qi3/drl_anonymity/data/hierarchies/salary_hierarchy.csv")
121+ data.getDefinition().setAttributeType(" salary" , buildSalaryHierarchy) // AttributeType.QUASI_IDENTIFYING_ATTRIBUTE)
122+
123+
124+ // create the ethnicity hierarchy
125+ // val ethnicityHierarchy = Hierarchy.create(ethnicityHierarchyFile,
126+ // Charset.defaultCharset, ',')
68127
69128 // create the hierarchies for the ethnicity and
70129 // salary
130+ // Create an instance of the anonymizer
131+ val anonymizer = new ARXAnonymizer
132+ val config = ARXConfiguration .create
133+ config.addPrivacyModel(new KAnonymity (5 ))
134+ config.setSuppressionLimit(0.02d )
135+
136+
137+ // anonymize the data using K-anonimity
138+ val result = anonymizer.anonymize(data, config)
139+
140+ // Print info
141+ printResult(result, data)
142+
143+ // Process results
144+ System .out.println(" - Transformed data:" )
145+ printHandle(handle = result.getOutput(false ))
146+ System .out.println(" Done!" )
71147
72148 }
73149
74150 // execute Experiment 1
75- experiment1
151+ // experiment1
152+
153+ // exploreHierarchy
154+ // run K-anonimity
155+ runKAnonimity
76156
77157}
0 commit comments