|
22 | 22 | sample_sharegpt_requests, |
23 | 23 | ids_for_prompt, |
24 | 24 | ) |
25 | | - |
| 25 | +import json |
26 | 26 | from aiu_fms_testing_utils.utils.aiu_setup import dprint, aiu_dist_setup |
27 | 27 |
|
28 | 28 | import os |
|
83 | 83 | LLAMA_3p1_70B_INSTRUCT, |
84 | 84 | ], |
85 | 85 | ) |
| 86 | +model_configuration_path = os.environ.get("FMS_TEST_SHAPES_FROM_MODEL_CONFIGURATION", "") |
| 87 | +model_configuration_frequency = os.environ.get("FMS_TEST_SHAPES_FROM_MODEL_CONFIGURATION_FREQUENCY", "0") |
| 88 | + |
86 | 89 | # for validation level 1, the default is a failure rate of 1% |
87 | 90 | # set this environment variable if you would like to relax that threshold |
88 | 91 | failure_rate_threshold = os.environ.get("FMS_TEST_SHAPES_FAILURE_THRESHOLD", 0.01) |
|
145 | 148 | os.environ["VLLM_DT_MAX_CONTEXT_LEN"] = str((((max(common_seq_lengths) + max(common_max_new_tokens)) // 64) + 1) * 64) |
146 | 149 | os.environ["VLLM_DT_MAX_BATCH_SIZE"] = str(max(common_batch_sizes)) |
147 | 150 |
|
148 | | -common_shapes = list( |
149 | | - itertools.product( |
150 | | - common_model_paths, |
151 | | - common_batch_sizes, |
152 | | - common_seq_lengths, |
153 | | - common_max_new_tokens, |
154 | | - ) |
155 | | -) |
156 | | - |
157 | 151 | # thresholds are chosen based on 1024 tokens per sequence |
158 | 152 | # 1% error threshold rate between cpu fp32 and cuda fp16 |
159 | 153 | # if a models failure thresholds do not exist in this dict, default to the default_metrics_threshold defined above |
|
180 | 174 | 0.0044301633024588115, |
181 | 175 | ), |
182 | 176 | } |
| 177 | + |
| 178 | +if model_configuration_path != "": |
| 179 | + print("ignoring FMS_TEST_SHAPES_COMMON_MODEL_PATHS, FMS_TEST_SHAPES_USE_MICRO_MODELS as configuration will be set by FMS_TEST_SHAPES_FROM_MODEL_CONFIGURATION") |
| 180 | + USE_MICRO_MODELS = False |
| 181 | + common_model_paths = [] |
| 182 | + frequency = int(model_configuration_frequency) |
| 183 | + with open(model_configuration_path, 'r') as f: |
| 184 | + for line in f: |
| 185 | + try: |
| 186 | + model_config = json.loads(line) |
| 187 | + if model_config["frequency"] <= frequency: |
| 188 | + common_model_paths.append(model_config["model_id"]) |
| 189 | + # assume fullsize models |
| 190 | + fail_thresholds[(model_config["model_id"], USE_MICRO_MODELS)] = (model_config["ce"], model_config["mean_diff"]) |
| 191 | + except json.JSONDecodeError: |
| 192 | + print(f"config contained an improper json line: {line.strip()}") |
| 193 | + |
| 194 | +common_shapes = list( |
| 195 | + itertools.product( |
| 196 | + common_model_paths, |
| 197 | + common_batch_sizes, |
| 198 | + common_seq_lengths, |
| 199 | + common_max_new_tokens, |
| 200 | + ) |
| 201 | +) |
| 202 | + |
183 | 203 | # custom weight adaptation to be used in future. For instance if we would like to add some other adaptation, we can register it with this custom adapter |
184 | 204 | # and provide it when converting from an aiu fms model's weights to a cpu fms model's weights. Currently this is only done for gptq, but may be done for other |
185 | 205 | # formats in the future |
|
0 commit comments