|
42 | 42 | GRANITE_3p3_8B_INSTRUCT = "ibm-granite/granite-3.3-8b-instruct" |
43 | 43 | GRANITE_20B_CODE_INSTRUCT_8K = "ibm-granite/granite-20b-code-instruct-8k" |
44 | 44 | LLAMA_3p1_70B_INSTRUCT = "meta-llama/Llama-3.1-70B-Instruct" |
| 45 | +MISTRAL_0p3_7B_INSTRUCT = "mistralai/Mistral-7B-Instruct-v0.3" |
45 | 46 |
|
46 | 47 | micro_model_mapping = { |
47 | 48 | LLAMA_3p1_8B_INSTRUCT: os.path.join(MICRO_MODELS_HOME, "llama-3.1-8b-layers-3-step-24000"), |
|
72 | 73 | GRANITE_3p3_8B_INSTRUCT, |
73 | 74 | GRANITE_20B_CODE_INSTRUCT_8K, |
74 | 75 | LLAMA_3p1_70B_INSTRUCT, |
| 76 | + MISTRAL_0p3_7B_INSTRUCT |
75 | 77 | ], |
76 | 78 | ) |
77 | 79 | # for validation level 1, the default is a failure rate of 1% |
|
145 | 147 | # threshold key is (model_id, is_tiny_model) |
146 | 148 | fail_thresholds = { |
147 | 149 | (LLAMA_3p1_8B_INSTRUCT, False): ( |
148 | | - 2.6994638133048965, |
149 | | - 0.00047589250549208347, |
| 150 | + 2.7080255031585696, |
| 151 | + 0.0004068055667448795, |
150 | 152 | ), |
151 | 153 | (GRANITE_3p2_8B_INSTRUCT, False): ( |
152 | 154 | 2.3919514417648315, |
153 | 155 | 0.0005767398688476533, |
154 | 156 | ), |
| 157 | + (GRANITE_3p2_8B_INSTRUCT, True): ( |
| 158 | + 2.7449850964546205, |
| 159 | + 0.00018840670207282534, |
| 160 | + ), |
155 | 161 | (GRANITE_3p3_8B_INSTRUCT, False): ( |
156 | 162 | 2.4444521379470827, |
157 | 163 | 0.0004970188625156878, |
158 | 164 | ), |
159 | 165 | (GRANITE_20B_CODE_INSTRUCT_8K, False): ( |
160 | | - 2.640706129074097, |
161 | | - 0.00034344267623964697, |
| 166 | + 2.646075320243838, |
| 167 | + 0.0003458251833217223, |
162 | 168 | ), |
| 169 | + # TODO: run llama 70B with 1,2,4,8 batches |
163 | 170 | (LLAMA_3p1_70B_INSTRUCT, False): ( |
164 | 171 | 2.841279556751251, |
165 | 172 | 0.0044301633024588115, |
166 | 173 | ), |
| 174 | + (MISTRAL_0p3_7B_INSTRUCT, False): ( |
| 175 | + 2.846206340789795, |
| 176 | + 0.0008768103783950205, |
| 177 | + ), |
167 | 178 | } |
168 | 179 | # custom weight adaptation to be used in future. For instance if we would like to add some other adaptation, we can register it with this custom adapter |
169 | 180 | # and provide it when converting from an aiu fms model's weights to a cpu fms model's weights. Currently this is only done for gptq, but may be done for other |
|
0 commit comments