Skip to content

Commit c23ed14

Browse files
committed
added a config option for test_decoders
Signed-off-by: Joshua Rosenkranz <jmrosenk@us.ibm.com>
1 parent fed4a67 commit c23ed14

File tree

2 files changed

+33
-10
lines changed

2 files changed

+33
-10
lines changed

tests/models/test_decoders.py

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
sample_sharegpt_requests,
2323
ids_for_prompt,
2424
)
25-
25+
import json
2626
from aiu_fms_testing_utils.utils.aiu_setup import dprint, aiu_dist_setup
2727

2828
import os
@@ -55,6 +55,9 @@
5555
"FMS_TEST_SHAPES_COMMON_MODEL_PATHS",
5656
[LLAMA_3p1_8B_INSTRUCT, GRANITE_3p2_8B_INSTRUCT, GRANITE_20B_CODE_INSTRUCT_8K, LLAMA_3p1_70B_INSTRUCT],
5757
)
58+
model_configuration_path = os.environ.get("FMS_TEST_SHAPES_FROM_MODEL_CONFIGURATION", "")
59+
model_configuration_priority = os.environ.get("FMS_TEST_SHAPES_FROM_MODEL_CONFIGURATION_PRIORITY", "0")
60+
5861
# for validation level 1, the default is a failure rate of 1%
5962
# set this environment variable if you would like to relax that threshold
6063
failure_rate_threshold = os.environ.get("FMS_TEST_SHAPES_FAILURE_THRESHOLD", 0.01)
@@ -99,15 +102,6 @@
99102
if isinstance(common_max_new_tokens, str):
100103
common_max_new_tokens = [int(mnt) for mnt in common_max_new_tokens.split(",")]
101104

102-
common_shapes = list(
103-
itertools.product(
104-
common_model_paths,
105-
common_batch_sizes,
106-
common_seq_lengths,
107-
common_max_new_tokens,
108-
)
109-
)
110-
111105
# thresholds are chosen based on 1024 tokens per sequence
112106
# 1% error threshold rate between cpu fp32 and cuda fp16
113107
# if a models failure thresholds do not exist in this dict, default to the default_metrics_threshold defined above
@@ -146,6 +140,32 @@
146140
0.0044301633024588115,
147141
),
148142
}
143+
144+
if model_configuration_path != "":
145+
print("ignoring FMS_TEST_SHAPES_COMMON_MODEL_PATHS, FMS_TEST_SHAPES_USE_MICRO_MODELS as configuration will be set by FMS_TEST_SHAPES_FROM_MODEL_CONFIGURATION")
146+
USE_MICRO_MODELS = False
147+
common_model_paths = []
148+
priority = int(model_configuration_priority)
149+
with open(model_configuration_path, 'r') as f:
150+
for line in f:
151+
try:
152+
model_config = json.loads(line)
153+
if model_config["priority"] <= priority:
154+
common_model_paths.append(model_config["model_id"])
155+
# assume fullsize models
156+
fail_thresholds[(model_config["model_id"], USE_MICRO_MODELS)] = (model_config["ce"], model_config["mean_diff"])
157+
except json.JSONDecodeError:
158+
print(f"config contained an improper json line: {line.strip()}")
159+
160+
common_shapes = list(
161+
itertools.product(
162+
common_model_paths,
163+
common_batch_sizes,
164+
common_seq_lengths,
165+
common_max_new_tokens,
166+
)
167+
)
168+
149169
# custom weight adaptation to be used in future. For instance if we would like to add some other adaptation, we can register it with this custom adapter
150170
# and provide it when converting from an aiu fms model's weights to a cpu fms model's weights. Currently this is only done for gptq, but may be done for other
151171
# formats in the future
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{"model_id": "mistralai/Mistral-7B-Instruct-v0.3", "ce": 2.8364005851745624, "mean_diff": 0.0007839603102183846, "priority": 2}
2+
/* FIXME: proper mean_diff, currently using from granite 3.1 8b instruct */
3+
{"model_id": "ibm-granite/granite-guardian-3.1-8b", "ce": 2.493684446811673, "mean_diff": 0.0005767398688476533, "priority": 1}

0 commit comments

Comments
 (0)