11from relation_features import make_data_from
2+ from utils import make_csv_from_json
23from train import test
34import numpy as np
45import pandas as pd
@@ -16,6 +17,7 @@ def create_similarity_matrix(pth,preds,pred_labels_list,strategy="one-to-many"):
1617 """
1718 Create a similarity matrix from the prediction
1819 """
20+ predicted_pairs = []
1921 preds = np .array (preds )
2022 preds = np .mean (preds ,axis = 0 )
2123 pred_labels_list = np .array (pred_labels_list )
@@ -28,6 +30,7 @@ def create_similarity_matrix(pth,preds,pred_labels_list,strategy="one-to-many"):
2830 df2_cols = df2 .columns
2931 # create similarity matrix for pred values
3032 preds_matrix = np .array (preds ).reshape (len (df1_cols ),len (df2_cols ))
33+ # create similarity matrix for pred labels
3134 if strategy == "one-to-many" :
3235 pred_labels_matrix = np .array (pred_labels ).reshape (len (df1_cols ),len (df2_cols ))
3336 elif strategy == "one-to-one" :
@@ -41,11 +44,19 @@ def create_similarity_matrix(pth,preds,pred_labels_list,strategy="one-to-many"):
4144 pred_labels_matrix [i ,j ] = 1
4245 df_pred = pd .DataFrame (preds_matrix ,columns = df2_cols ,index = df1_cols )
4346 df_pred_labels = pd .DataFrame (pred_labels_matrix ,columns = df2_cols ,index = df1_cols )
44- return df_pred ,df_pred_labels
47+ for i in range (len (df_pred_labels )):
48+ for j in range (len (df_pred_labels .iloc [i ])):
49+ if df_pred_labels .iloc [i ,j ] == 1 :
50+ predicted_pairs .append ((df_pred .index [i ],df_pred .columns [j ],df_pred .iloc [i ,j ]))
51+ return df_pred ,df_pred_labels ,predicted_pairs
4552
4653if __name__ == '__main__' :
4754 pth = args .path
4855 model_pth = args .model
56+ # transform jsonl or json file to csv
57+ for file in os .listdir (args .path ):
58+ if file .endswith ('.json' ) or file .endswith ('.jsonl' ):
59+ make_csv_from_json (pth + "/" + file )
4960
5061 features ,_ = make_data_from (pth ,"test" )
5162 preds = []
@@ -64,6 +75,9 @@ def create_similarity_matrix(pth,preds,pred_labels_list,strategy="one-to-many"):
6475 pred_labels_list .append (pred_labels )
6576 del bst
6677
67- df_pred ,df_pred_labels = create_similarity_matrix (pth ,preds ,pred_labels_list ,strategy = args .strategy )
78+ df_pred ,df_pred_labels , predicted_pairs = create_similarity_matrix (pth ,preds ,pred_labels_list ,strategy = args .strategy )
6879 df_pred .to_csv (pth + "/similarity_matrix_value.csv" ,index = True )
69- df_pred_labels .to_csv (pth + "/similarity_matrix_label.csv" ,index = True )
80+ df_pred_labels .to_csv (pth + "/similarity_matrix_label.csv" ,index = True )
81+
82+ for pair_tuple in predicted_pairs :
83+ print (pair_tuple )
0 commit comments