|
22 | 22 | sample_sharegpt_requests, |
23 | 23 | ids_for_prompt, |
24 | 24 | ) |
25 | | - |
| 25 | +import json |
26 | 26 | from aiu_fms_testing_utils.utils.aiu_setup import dprint, aiu_dist_setup |
27 | 27 |
|
28 | 28 | import os |
|
55 | 55 | "FMS_TEST_SHAPES_COMMON_MODEL_PATHS", |
56 | 56 | [LLAMA_3p1_8B_INSTRUCT, GRANITE_3p2_8B_INSTRUCT, GRANITE_20B_CODE_INSTRUCT_8K, LLAMA_3p1_70B_INSTRUCT], |
57 | 57 | ) |
| 58 | +model_configuration_path = os.environ.get("FMS_TEST_SHAPES_FROM_MODEL_CONFIGURATION", "") |
| 59 | +model_configuration_priority = os.environ.get("FMS_TEST_SHAPES_FROM_MODEL_CONFIGURATION_PRIORITY", "0") |
| 60 | + |
58 | 61 | # for validation level 1, the default is a failure rate of 1% |
59 | 62 | # set this environment variable if you would like to relax that threshold |
60 | 63 | failure_rate_threshold = os.environ.get("FMS_TEST_SHAPES_FAILURE_THRESHOLD", 0.01) |
|
99 | 102 | if isinstance(common_max_new_tokens, str): |
100 | 103 | common_max_new_tokens = [int(mnt) for mnt in common_max_new_tokens.split(",")] |
101 | 104 |
|
102 | | -common_shapes = list( |
103 | | - itertools.product( |
104 | | - common_model_paths, |
105 | | - common_batch_sizes, |
106 | | - common_seq_lengths, |
107 | | - common_max_new_tokens, |
108 | | - ) |
109 | | -) |
110 | | - |
111 | 105 | # thresholds are chosen based on 1024 tokens per sequence |
112 | 106 | # 1% error threshold rate between cpu fp32 and cuda fp16 |
113 | 107 | # if a models failure thresholds do not exist in this dict, default to the default_metrics_threshold defined above |
|
146 | 140 | 0.0044301633024588115, |
147 | 141 | ), |
148 | 142 | } |
| 143 | + |
| 144 | +if model_configuration_path != "": |
| 145 | + print("ignoring FMS_TEST_SHAPES_COMMON_MODEL_PATHS, FMS_TEST_SHAPES_USE_MICRO_MODELS as configuration will be set by FMS_TEST_SHAPES_FROM_MODEL_CONFIGURATION") |
| 146 | + USE_MICRO_MODELS = False |
| 147 | + common_model_paths = [] |
| 148 | + priority = int(model_configuration_priority) |
| 149 | + with open(model_configuration_path, 'r') as f: |
| 150 | + for line in f: |
| 151 | + try: |
| 152 | + model_config = json.loads(line) |
| 153 | + if model_config["priority"] <= priority: |
| 154 | + common_model_paths.append(model_config["model_id"]) |
| 155 | + # assume fullsize models |
| 156 | + fail_thresholds[(model_config["model_id"], USE_MICRO_MODELS)] = (model_config["ce"], model_config["mean_diff"]) |
| 157 | + except json.JSONDecodeError: |
| 158 | + print(f"config contained an improper json line: {line.strip()}") |
| 159 | + |
| 160 | +common_shapes = list( |
| 161 | + itertools.product( |
| 162 | + common_model_paths, |
| 163 | + common_batch_sizes, |
| 164 | + common_seq_lengths, |
| 165 | + common_max_new_tokens, |
| 166 | + ) |
| 167 | +) |
| 168 | + |
149 | 169 | # custom weight adaptation to be used in future. For instance if we would like to add some other adaptation, we can register it with this custom adapter |
150 | 170 | # and provide it when converting from an aiu fms model's weights to a cpu fms model's weights. Currently this is only done for gptq, but may be done for other |
151 | 171 | # formats in the future |
|
0 commit comments