Skip to content

Commit bf1e809

Browse files
committed
Saves layer output stack
Signed-off-by: Flavia Beo <flavia.beo@ibm.com>
1 parent 3d30a85 commit bf1e809

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

scripts/generate_layers_metrics.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import json
23
import time
34
import logging
45
import argparse
@@ -205,7 +206,7 @@ def __register_call_layers(model, batch_size, device, seq_length, max_new_tokens
205206
tokenizer (Tokenizer): The tokenizer used for tokenization.
206207
207208
Returns:
208-
list: A list of tuples containing the name and output of each layer in the model.
209+
dict: A dict containing the name and output of each layer in the model.
209210
"""
210211
layer_stack = {}
211212
pt_compile_model_time = time.time()
@@ -389,6 +390,9 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens):
389390
seq_length=seq_length, max_new_tokens=max_new_tokens,
390391
tokenizer=tokenizer)
391392

393+
with open(os.join(output_path,f"{model_path}-layer-output-stack-cpu.json"), 'w') as f:
394+
json.dump(layer_stack_cpu, f)
395+
392396
global generate_iters
393397
generate_iters = 0
394398
logger.info(f"Finished registering CPU layers")
@@ -398,6 +402,9 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens):
398402
device="cuda",
399403
seq_length=seq_length, max_new_tokens=max_new_tokens,
400404
tokenizer=tokenizer)
405+
406+
with open(os.join(output_path,f"{model_path}-layer-output-stack-gpu.json"), 'w') as f:
407+
json.dump(layer_stack_cuda, f)
401408

402409
assert len(layer_stack_cuda.keys()) == len(layer_stack_cpu.keys())
403410

0 commit comments

Comments
 (0)