Merge pull request gousiosg#27 from bitslab/alekh

ameka4 · web-flow · commit e6577e594fe0 · 2022-11-22T16:53:32.000-06:00
Updated RQ4 scripts for rpki-commons
diff --git a/artifacts/experiments/RQ4/generateResults.py b/artifacts/experiments/RQ4/generateResults.py
@@ -6,11 +6,12 @@
 import pandas as pd
 
 BASE_RESULT_DIR = "artifacts/results/"
-PROJECTS = ["jflex", "convex", "mph-table"]
+PROJECTS = ["convex", "jflex", "mph-table", "rpki-commons"]
 REPORT_NAME = "artifacts/output/rq4.csv"
 TEX_REPORT_NAME = "artifacts/output/rq4.tex"
 
-CALC_NAMES = ['Vanilla', 'Improved']
+RAW_NAMES = ['Vanilla', 'Improved']
+CALC_NAMES = ['Vanilla', 'Improved', 'Overhead']
 
 propertyShortNames = {
     "TestSmartByteSerializer#canRoundTripBytes": 'byte',
@@ -31,7 +32,13 @@
     "CharClassesQuickcheck#addString": 'addString',
     "StateSetQuickcheck#addStateDoesNotRemove": 'add',
     "StateSetQuickcheck#containsElements": 'contains',
-    "StateSetQuickcheck#removeAdd": 'remove'
+    "StateSetQuickcheck#removeAdd": 'remove',
+    "RoaCMSBuilderPropertyTest#buildEncodedParseCheck": 'roa',
+    "ManifestCMSBuilderPropertyTest#buildEncodedParseCheck": 'manifest',
+    "AspaCmsTest#should_generate_aspa": 'aspa',
+    "X509ResourceCertificateParentChildValidatorTest#validParentChildSubResources": 'resources',
+    "X509ResourceCertificateParentChildValidatorTest#validParentChildOverClaiming": 'claiming',
+    "X509ResourceCertificateParentChildValidatorTest#validParentChildOverClaimingLooseValidation": 'loose'
 }
 
 row_count = 1
@@ -45,6 +52,8 @@ def filter_for_recent_results(project_name: str, stats_directories: list[str]) -
     project_string = project_name if project_name != "convex" else project_name + "-core"  # edge case
     if "mph-table-fixed" in stats_directories[0]:  # edge case
         project_string = "mph-table-fixed"
+    elif "rpki-commons-fixed" in stats_directories[0]:
+        project_string = "rpki-commons-fixed"
     time_stamps = [datetime.datetime.strptime(x.replace(project_string, "").replace("_", ":").replace("T", " "), "%Y-%m-%d %H:%M:%S.%f")
                    for x in stats_directories]
     time_stamps.sort()
@@ -88,11 +97,14 @@ def generate_report_stats(stat_values: dict[str, dict]) -> dict[str, str]:
     property_dict = {}
     for key in first_iteration:
         property_dict[key] = []
-
+    
     # populate the dictionary with our results
     for key, val in stat_values.items():
         for prop, time in val.items():
             property_array = property_dict.get(prop)
+            if property_array is None:
+                property_dict[prop] = []
+                property_array = property_dict.get(prop)
             property_array.append(time)
 
     # generate mean, standard deviation and populate our final object
@@ -141,7 +153,7 @@ def main():
         fixed_stats_directories = obtain_stats_directories(results_directory=fixed_results_directory)
         evaluated_fixed_runs = filter_for_recent_results(project_name=project_name, stats_directories=fixed_stats_directories)
         fixed_raw_stats = evaluate_directories(project_name=fixed_project_name, results_directory=fixed_results_directory, directories=evaluated_fixed_runs)
-
+        
         # obtain mean/st dev
         final_stats = generate_report_stats(stat_values=raw_stats)
         final_fixed_stats = generate_report_stats(stat_values=fixed_raw_stats)
@@ -153,21 +165,24 @@ def main():
         df = pd.DataFrame()
         for project in PROJECTS:
             final_dataset[project]['_style'] = ''
-            proj_mean_and_std = final_dataset[project][CALC_NAMES].copy()
+            proj_mean_and_std = final_dataset[project][RAW_NAMES].copy()
             vanilla_mean = pd.DataFrame(proj_mean_and_std['Vanilla'].apply(lambda v: float(v.split(" \u00B1 ")[0]) if
                                                                 " \u00B1 " in str(v) else np.nan)).reset_index()
             improved_mean = pd.DataFrame(proj_mean_and_std['Improved'].apply(lambda v: float(v.split(" \u00B1 ")[0]) if
                                                                 " \u00B1 " in str(v) else np.nan)).reset_index()
 
-            proj_stats = pd.merge(vanilla_mean.copy(), improved_mean.copy(), how='outer', on='index')[CALC_NAMES]
-            final_dataset[project]['Difference'] = proj_stats[['Vanilla', 'Improved']].pct_change(axis='columns')['Improved']
-            proj_mean = pd.merge(vanilla_mean, improved_mean, how='outer', on='index')[CALC_NAMES].mean()
+            proj_stats = pd.merge(vanilla_mean, improved_mean, how='outer', on='index')[RAW_NAMES].reset_index()
+
+            final_dataset[project]['Overhead'] = proj_stats[['Improved']].values / proj_stats[['Vanilla']].values
+            overhead_stats = final_dataset[project]['Overhead'].copy().reset_index()
+
+            proj_mean = pd.merge(proj_stats, overhead_stats, how='outer', on='index')[CALC_NAMES].mean()
             proj_mean['_style'] = 'BOLD'
             proj_mean['N'] = ''
             proj_mean['Property'] = 'Average'
             final_dataset[project].loc['mean'] = proj_mean
 
-            header = dict(zip(['N', 'Property', 'Vanilla', 'Improved', 'Difference'], ['', '', '', '', '']))
+            header = dict(zip(['N', 'Property', 'Vanilla', 'Improved', 'Overhead'], ['', '', '', '', '']))
             df = pd.concat([
                 df,
                 pd.DataFrame(header | {'_style': 'HEADER', 'Property': project}, index=[0]),
@@ -203,6 +218,5 @@ def main():
         tf.write(outTable)
 
 
-
 if __name__ == "__main__":
     main()
diff --git a/artifacts/output/rq4.tex b/artifacts/output/rq4.tex
@@ -0,0 +1,44 @@
+\begin{tabular}{lllll}
+N & Property & Vanilla & Improved & Overhead \\
+\hline
+\multicolumn{5}{c}{convex} \\
+\hline
+1 & message & 45.22 ± 3.36 & 42.80 ± 2.70 & 0.95 \\
+2 & data & 53.02 ± 1.99 & 53.92 ± 0.00 & 1.02 \\
+3 & primitive & 48.52 ± 0.88 & 50.12 ± 0.00 & 1.03 \\
+\textbf{} & \textbf{Average} & \textbf{48.92} & \textbf{48.95} & \textbf{1.00} \\
+\hline
+\multicolumn{5}{c}{jflex} \\
+\hline
+1 & remove & 41.50 ± 6.41 & 31.56 ± 5.16 & 0.76 \\
+2 & addSingleton & 44.89 ± 0.77 & 51.57 ± 0.79 & 1.15 \\
+3 & contains & 43.82 ± 2.12 & 42.61 ± 1.68 & 0.97 \\
+4 & addSet & 44.56 ± 0.94 & 52.82 ± 0.87 & 1.19 \\
+5 & add & 43.06 ± 1.84 & 43.10 ± 1.80 & 1.00 \\
+6 & addString & 44.93 ± 1.64 & 51.53 ± 1.59 & 1.15 \\
+7 & addSingle & 44.40 ± 2.21 & 53.14 ± 1.53 & 1.20 \\
+\textbf{} & \textbf{Average} & \textbf{43.88} & \textbf{46.62} & \textbf{1.06} \\
+\hline
+\multicolumn{5}{c}{mph-table} \\
+\hline
+1 & list & 13.49 ± 0.98 & 13.19 ± 0.94 & 0.98 \\
+2 & optionals & 12.75 ± 0.76 & 13.17 ± 1.08 & 1.03 \\
+3 & short & 12.68 ± 0.67 & 13.16 ± 0.58 & 1.04 \\
+4 & int & 13.09 ± 0.81 & 12.79 ± 1.00 & 0.98 \\
+5 & pair & 12.76 ± 0.81 & 13.16 ± 1.09 & 1.03 \\
+6 & byte & 12.57 ± 0.79 & 12.85 ± 1.04 & 1.02 \\
+7 & long & 12.80 ± 0.75 & 13.04 ± 0.91 & 1.02 \\
+8 & string & 12.73 ± 0.60 & 12.99 ± 0.97 & 1.02 \\
+9 & list* & nan & 27.19 ± 3.60 & nan \\
+\textbf{} & \textbf{Average} & \textbf{12.86} & \textbf{14.62} & \textbf{1.01} \\
+\hline
+\multicolumn{5}{c}{rpki-commons} \\
+\hline
+1 & claiming & 25.33 ± 2.37 & nan & nan \\
+2 & aspa & 23.42 ± 1.72 & nan & nan \\
+3 & resources & 24.18 ± 1.60 & 26.14 ± 0.28 & 1.08 \\
+4 & roa & 24.29 ± 3.24 & nan & nan \\
+5 & manifest & 23.41 ± 2.44 & nan & nan \\
+6 & loose & 25.48 ± 2.62 & nan & nan \\
+\textbf{} & \textbf{Average} & \textbf{24.35} & \textbf{26.14} & \textbf{1.08} \\
+\end{tabular}
diff --git a/run_experiment_rq4.sh b/run_experiment_rq4.sh
@@ -1,6 +1,6 @@
 GEN_STATS="artifacts/experiments/RQ4/generateStats.py"
 GEN_RESULTS="artifacts/experiments/RQ4/generateResults.py"
-for PROJECT in mph-table convex jflex
+for PROJECT in convex jflex mph-table rpki-commons
 do
   echo Generating statistics for $PROJECT
   python3 $GEN_STATS $PROJECT