2929common_seq_lengths = [64 ]
3030common_max_new_tokens = [128 ]
3131
32- output_dir = os .environ .get ("OUTPUT_PATH" , os .path .expanduser ("~/tmp/output" )
33- )
32+ output_dir = os .environ .get ("OUTPUT_PATH" , "/tmp/output" )
3433
3534# pass custom model path list for eg: EXPORT FMS_TESTING_COMMON_MODEL_PATHS="/tmp/models/granite-3-8b-base,/tmp/models/granite-7b-base"
3635if isinstance (common_model_paths , str ):
@@ -199,10 +198,18 @@ def post_hook_fn(module, input, output):
199198def write_csv (l , path , metric ):
200199 with open (path , 'w' ) as f :
201200 f .write (f'{ metric } \n ' )
202- for t in l :
203- f .write (f"{ t [2 ].item ()} \n " )
201+ if type (l ) is list :
202+ for t in l :
203+ f .write (f"{ t } \n " )
204+ else :
205+ f .write (f"{ l } \n " )
204206 f .close ()
205207
208+ def convert_tensor (output ):
209+ out_unique = set (output )
210+ keys = {key : value for key , value in zip (out_unique , range (len (out_unique )))}
211+ return torch .zeros (size = (len (output ), len (keys )))
212+
206213def generate_layers_metrics (model_path , batch_size , seq_length , max_new_tokens ):
207214 torch .manual_seed (42 )
208215 os .environ ["COMPILATION_MODE" ] = "offline_decoder"
@@ -249,43 +256,46 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens):
249256 tokenizer = tokenizer )
250257
251258 absolute_differences = []
259+ mean_diff_list = []
260+ median_diff_list = []
261+ abs_diff_list = []
252262
253263 assert len (layer_stack_cuda ) == len (layer_stack_cpu )
254264
255- for layer , cuda_out in layer_stack_cuda :
265+ for layer , cuda_output in layer_stack_cuda :
256266 for cpu_layer , cpu_output in layer_stack_cpu :
257267 if cpu_layer == layer :
258268 print ("CPU Layer {} GPU Layer {}" .format (cpu_layer , layer ))
259269
260- if not type (cuda_out ) is tuple :
261- tensor_cpu_cuda_out = cuda_out .to (torch .device ('cpu' ))
270+ if not type (cuda_output ) is tuple :
271+ tensor_cuda_out = cuda_output .to (torch .device ('cpu' ))
262272 else :
263- cuda_out_unique = set (cuda_out )
264- keys = {key : value for key , value in zip (cuda_out_unique , range (len (cuda_out_unique )))}
265- tensor_cpu_cuda_out = torch .zeros (size = (len (cuda_out ), len (keys )))
266-
267- abs_diff = torch .abs (cpu_output - tensor_cpu_cuda_out ).flatten ().tolist ()
268- absolute_differences .extend (abs_diff )
273+ tensor_cuda_out = convert_tensor (cuda_output )
274+ if type (cpu_output ) is tuple :
275+ tensor_cpu_out = convert_tensor (cpu_output )
276+ else :
277+ tensor_cpu_out = cpu_output
269278
279+ abs_diff = torch .abs (tensor_cpu_out - tensor_cuda_out ).flatten ().tolist ()
280+ absolute_differences .extend (abs_diff )
270281 if len (absolute_differences ) == 0 :
271282 abs_diff = {"mean" : float ('nan' ), "median" : float ('nan' ), "q1" : float ('nan' ), "q3" : float ('nan' )}
272283
273- abs_diff_tensor = torch .tensor (absolute_differences )
274- abs_diff_tensor = torch .nan_to_num (abs_diff_tensor , nan = 0.0 )
275- mean_diff = torch .mean (abs_diff_tensor ).item ()
276- median_diff = torch .median (abs_diff_tensor ).item ()
284+ abs_diff_tensor = torch .tensor (absolute_differences )
285+ abs_diff_tensor = torch .nan_to_num (abs_diff_tensor , nan = 0.0 )
286+ mean_diff = torch .mean (abs_diff_tensor ).item ()
287+ median_diff = torch .median (abs_diff_tensor ).item ()
288+
289+ prefix = get_default_validation_prefix (model_id , max_new_token , batch_size , 0 , 'float16' )
277290
278- return abs_diff , mean_diff , median_diff
291+ write_csv (abs_diff , os .path .join (output_dir , f"{ prefix } --{ layer } .abs_diff.csv" ), "abs_diff" )
292+ write_csv (mean_diff , os .path .join (output_dir , f"{ prefix } --{ layer } .mean_diff.csv" ), "mean_diff" )
293+ write_csv (median_diff , os .path .join (output_dir , f"{ prefix } --{ layer } .median_diff.csv" ), "median_diff" )
294+
295+ print (f"Completed { model_id } layers' metrics generation" )
279296
280297for model_id , batch_size , sequence_length , max_new_token in common_shapes :
281298 print ("testing " , "model_id-" , model_id , ", max_new_tokens-" , max_new_token , ", batch_size-" ,batch_size , ", seq_length-" ,sequence_length )
282299 abs_diff , mean_diff , median_diff = generate_layers_metrics (model_path = model_id , batch_size = batch_size , seq_length = sequence_length , max_new_tokens = max_new_token )
283300
284- prefix = get_default_validation_prefix (model_id , max_new_token , batch_size , 0 , 'float16' )
285- if os .path .exists (os .path .join (output_dir , f"{ prefix } .abs_diff.csv" )):
286- print ("skipping metric generation as it has already been done" )
287- exit (0 )
288- write_csv (abs_diff , os .path .join (output_dir , f"{ prefix } .abs_diff.csv" ), "abs_diff" )
289- write_csv (mean_diff , os .path .join (output_dir , f"{ prefix } .mean_diff.csv" ), "mean_diff" )
290- write_csv (median_diff , os .path .join (output_dir , f"{ prefix } .median_diff.csv" ), "median_diff" )
291301
0 commit comments