Skip to content

Commit 719f29d

Browse files
committed
Max lenght for hf fixed
Signed-off-by: Flavia Beo <flavia.beo@ibm.com>
1 parent 894ebd6 commit 719f29d

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

scripts/generate_layers_metrics.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,9 @@ def __infer_layer(model, max_len, device, max_new_tokens, batch_size, tokenizer)
151151
if "cuda" in device:
152152
ids = ids.to("cuda")
153153

154-
if hasattr(model.config, "ntk_scaling") and model.config.ntk_scaling:
154+
if args.model_loader == "hf":
155+
max_seq_len = max_len
156+
elif hasattr(model.config, "ntk_scaling") and model.config.ntk_scaling:
155157
max_seq_len = max(max_len, model.config.max_expected_seq_len)
156158
else:
157159
# without ntk scaling, extending the seq length too far gives bogus results.
@@ -174,7 +176,7 @@ def __infer_layer(model, max_len, device, max_new_tokens, batch_size, tokenizer)
174176
)
175177
if args.model_loader == "hf":
176178
result = model.generate(ids,
177-
max_length=max_len,
179+
max_length=max_seq_len,
178180
max_new_tokens=max_new_token,
179181
do_sample=do_sample,
180182
use_cache=use_cache)

0 commit comments

Comments
 (0)