@@ -267,26 +267,31 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens):
267267 assert len (layer_stack_cuda ) == len (layer_stack_cpu )
268268
269269 for layer , cuda_output in layer_stack_cuda :
270+ tensor_cuda_out = None
271+ tensor_cpu_out = None
272+ abs_diff = None
270273 for cpu_layer , cpu_output in layer_stack_cpu :
271274 if cpu_layer == layer :
272275 print ("CPU Layer {} GPU Layer {}" .format (cpu_layer , layer ))
273276
274277 if not type (cuda_output ) is tuple :
275- tensor_cuda_out = cuda_output . to ( torch . device ( 'cpu' ))
278+ tensor_cuda_out = cuda_output
276279 else :
277280 tensor_cuda_out = convert_tensor (cuda_output )
278281 if type (cpu_output ) is tuple :
279282 tensor_cpu_out = convert_tensor (cpu_output )
280283 else :
281- tensor_cpu_out = cpu_output
284+ tensor_cpu_out = cpu_output . to ( 'cuda' )
282285 print ("tensor converted... get torch abs diff" )
283286 abs_diff = torch .abs (tensor_cpu_out - tensor_cuda_out ).flatten ().tolist ()
284- cos = nn .CosineSimilarity ()
285- cos_sim = cos (tensor_cpu_out - tensor_cuda_out )
286-
287- print ("abs_diff and cos_sim calculated" )
287+ print ("abs_diff calculated" )
288+ cos = nn .CosineSimilarity (dim = 1 )
289+ cos_sim = cos (tensor_cpu_out , tensor_cuda_out )
290+ print (cos_sim )
291+
288292 absolute_differences .append (abs_diff )
289- print ("abs_diff list extended" )
293+ print ("abs_diff list appended" )
294+ print (len (absolute_differences ))
290295
291296 prefix = get_default_validation_prefix (model_id , max_new_token , batch_size , 0 , 'float16' )
292297 layer_name = str (layer ).replace ('[' ,'' ).replace (']' , '' )
0 commit comments