33
44import itertools
55import torch
6+ import torch .nn as nn
67
78from fms .utils import tokenizers
89from fms .models import get_model
2425 "SHARE_GPT_DATASET_PATH" , os .path .expanduser ("~/share_gpt.json" )
2526)
2627
27- common_model_paths = "ibm-granite/granite-3.2-8b-instruct"
28- common_batch_sizes = [1 ]
29- common_seq_lengths = [64 ]
30- common_max_new_tokens = [128 ]
28+ common_model_paths = common_model_paths = os .environ .get (
29+ "MODEL_PATHS" ,
30+ ["ibm-granite/granite-3.2-8b-instruct" ],
31+ )
32+ common_batch_sizes = os .environ .get ("BATCH_SIZES" , [1 , 2 , 4 , 8 ])
33+ common_seq_lengths = os .environ .get ("SEQ_LENGTHS" , [64 , 2048 ])
34+ common_max_new_tokens = os .environ .get ("MAX_NEW_TOKENS" , [128 ])
3135
3236output_dir = os .environ .get ("OUTPUT_PATH" , "/tmp/output" )
3337
@@ -196,17 +200,20 @@ def post_hook_fn(module, input, output):
196200 return layer_stack
197201
198202def write_csv (l , path , metric ):
203+ print ("saving file" )
199204 with open (path , 'w' ) as f :
200205 f .write (f'{ metric } \n ' )
201- if type (l ) is list :
206+ if not type (l ) is float :
207+ print ("saving list" )
202208 for t in l :
203- f .write (f"{ t } \n " )
209+ f .write (f"{ t } \n " )
204210 else :
205- f .write (f"{ l } \n " )
211+ print ("saving float" )
212+ f .write (f"{ l } \n " )
206213 f .close ()
207214
208215def convert_tensor (output ):
209- out_unique = set (output )
216+ out_unique = set (list ( itertools . chain . from_iterable ( output )) )
210217 keys = {key : value for key , value in zip (out_unique , range (len (out_unique )))}
211218 return torch .zeros (size = (len (output ), len (keys )))
212219
@@ -256,9 +263,6 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens):
256263 tokenizer = tokenizer )
257264
258265 absolute_differences = []
259- mean_diff_list = []
260- median_diff_list = []
261- abs_diff_list = []
262266
263267 assert len (layer_stack_cuda ) == len (layer_stack_cpu )
264268
@@ -275,27 +279,26 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens):
275279 tensor_cpu_out = convert_tensor (cpu_output )
276280 else :
277281 tensor_cpu_out = cpu_output
278-
282+ print ( "tensor converted... get torch abs diff" )
279283 abs_diff = torch .abs (tensor_cpu_out - tensor_cuda_out ).flatten ().tolist ()
280- absolute_differences .extend (abs_diff )
281- if len (absolute_differences ) == 0 :
282- abs_diff = {"mean" : float ('nan' ), "median" : float ('nan' ), "q1" : float ('nan' ), "q3" : float ('nan' )}
283-
284- abs_diff_tensor = torch .tensor (absolute_differences )
285- abs_diff_tensor = torch .nan_to_num (abs_diff_tensor , nan = 0.0 )
286- mean_diff = torch .mean (abs_diff_tensor ).item ()
287- median_diff = torch .median (abs_diff_tensor ).item ()
284+ cos = nn .CosineSimilarity ()
285+ cos_sim = cos (tensor_cpu_out - tensor_cuda_out )
286+
287+ print ("abs_diff and cos_sim calculated" )
288+ absolute_differences .append (abs_diff )
289+ print ("abs_diff list extended" )
288290
289291 prefix = get_default_validation_prefix (model_id , max_new_token , batch_size , 0 , 'float16' )
292+ layer_name = str (layer ).replace ('[' ,'' ).replace (']' , '' )
290293
291- write_csv ( abs_diff , os . path . join ( output_dir , f" { prefix } -- { layer } .abs_diff.csv" ), "abs_diff " )
292- write_csv (mean_diff , os .path .join (output_dir , f"{ prefix } --{ layer } .mean_diff .csv" ), "mean_diff " )
293- write_csv (median_diff , os .path .join (output_dir , f"{ prefix } --{ layer } .median_diff .csv" ), "median_diff " )
294+ print ( "saving files " )
295+ write_csv (abs_diff , os .path .join (output_dir , f"{ prefix } --{ layer_name } .abs_diff .csv" ), "abs_diff " )
296+ write_csv (cos_sim , os .path .join (output_dir , f"{ prefix } --{ layer_name } .cos_sim .csv" ), "cos_sim " )
294297
295298 print (f"Completed { model_id } layers' metrics generation" )
296299
297300for model_id , batch_size , sequence_length , max_new_token in common_shapes :
298301 print ("testing " , "model_id-" , model_id , ", max_new_tokens-" , max_new_token , ", batch_size-" ,batch_size , ", seq_length-" ,sequence_length )
299- abs_diff , mean_diff , median_diff = generate_layers_metrics (model_path = model_id , batch_size = batch_size , seq_length = sequence_length , max_new_tokens = max_new_token )
302+ generate_layers_metrics (model_path = model_id , batch_size = batch_size , seq_length = sequence_length , max_new_tokens = max_new_token )
300303
301304
0 commit comments