Skip to content

Commit 596baca

Browse files
authored
Merge pull request #60 from foundation-model-stack/test_decoders_configuration
added an option to run test_decoders from a model configuration file
2 parents 1a77f63 + 75fe897 commit 596baca

File tree

2 files changed

+33
-10
lines changed

2 files changed

+33
-10
lines changed

tests/models/test_decoders.py

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
sample_sharegpt_requests,
2323
ids_for_prompt,
2424
)
25-
25+
import json
2626
from aiu_fms_testing_utils.utils.aiu_setup import dprint, aiu_dist_setup
2727

2828
import os
@@ -83,6 +83,9 @@
8383
LLAMA_3p1_70B_INSTRUCT,
8484
],
8585
)
86+
model_configuration_path = os.environ.get("FMS_TEST_SHAPES_FROM_MODEL_CONFIGURATION", "")
87+
model_configuration_frequency = os.environ.get("FMS_TEST_SHAPES_FROM_MODEL_CONFIGURATION_FREQUENCY", "0")
88+
8689
# for validation level 1, the default is a failure rate of 1%
8790
# set this environment variable if you would like to relax that threshold
8891
failure_rate_threshold = os.environ.get("FMS_TEST_SHAPES_FAILURE_THRESHOLD", 0.01)
@@ -145,15 +148,6 @@
145148
os.environ["VLLM_DT_MAX_CONTEXT_LEN"] = str((((max(common_seq_lengths) + max(common_max_new_tokens)) // 64) + 1) * 64)
146149
os.environ["VLLM_DT_MAX_BATCH_SIZE"] = str(max(common_batch_sizes))
147150

148-
common_shapes = list(
149-
itertools.product(
150-
common_model_paths,
151-
common_batch_sizes,
152-
common_seq_lengths,
153-
common_max_new_tokens,
154-
)
155-
)
156-
157151
# thresholds are chosen based on 1024 tokens per sequence
158152
# 1% error threshold rate between cpu fp32 and cuda fp16
159153
# if a models failure thresholds do not exist in this dict, default to the default_metrics_threshold defined above
@@ -180,6 +174,32 @@
180174
0.0044301633024588115,
181175
),
182176
}
177+
178+
if model_configuration_path != "":
179+
print("ignoring FMS_TEST_SHAPES_COMMON_MODEL_PATHS, FMS_TEST_SHAPES_USE_MICRO_MODELS as configuration will be set by FMS_TEST_SHAPES_FROM_MODEL_CONFIGURATION")
180+
USE_MICRO_MODELS = False
181+
common_model_paths = []
182+
frequency = int(model_configuration_frequency)
183+
with open(model_configuration_path, 'r') as f:
184+
for line in f:
185+
try:
186+
model_config = json.loads(line)
187+
if model_config["frequency"] <= frequency:
188+
common_model_paths.append(model_config["model_id"])
189+
# assume fullsize models
190+
fail_thresholds[(model_config["model_id"], USE_MICRO_MODELS)] = (model_config["ce"], model_config["mean_diff"])
191+
except json.JSONDecodeError:
192+
print(f"config contained an improper json line: {line.strip()}")
193+
194+
common_shapes = list(
195+
itertools.product(
196+
common_model_paths,
197+
common_batch_sizes,
198+
common_seq_lengths,
199+
common_max_new_tokens,
200+
)
201+
)
202+
183203
# custom weight adaptation to be used in future. For instance if we would like to add some other adaptation, we can register it with this custom adapter
184204
# and provide it when converting from an aiu fms model's weights to a cpu fms model's weights. Currently this is only done for gptq, but may be done for other
185205
# formats in the future
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{"model_id": "mistralai/Mistral-7B-Instruct-v0.3", "ce": 2.8364005851745624, "mean_diff": 0.0007839603102183846, "frequency": 2}
2+
/* FIXME: proper mean_diff, currently using from granite 3.1 8b instruct */
3+
{"model_id": "ibm-granite/granite-guardian-3.1-8b", "ce": 2.493684446811673, "mean_diff": 0.0005767398688476533, "frequency": 1}

0 commit comments

Comments
 (0)