Skip to content

Commit 4adf53a

Browse files
authored
Merge pull request #48 from flaviabeo/mistral-test-expect
#40 Adds Mistral 7B expectations and metrics
2 parents bb03e87 + 55a0502 commit 4adf53a

3 files changed

+5
-2
lines changed

tests/models/test_model_expectations.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,10 @@
2020
LLAMA_3p1_8B_INSTRUCT = "meta-llama/Llama-3.1-8B-Instruct"
2121
GRANITE_3p2_8B_INSTRUCT = "ibm-granite/granite-3.2-8b-instruct"
2222
GRANITE_GUARDIAN_3p1_8B = "ibm-granite/granite-guardian-3.1-8b"
23+
MISTRAL_7B_INSTRUCT = "mistralai/Mistral-7B-Instruct-v0.3"
2324
ROBERTA_SQUAD_v2 = "deepset/roberta-base-squad2"
2425

25-
micro_models = {LLAMA_3p1_8B_INSTRUCT, GRANITE_3p2_8B_INSTRUCT, GRANITE_GUARDIAN_3p1_8B}
26+
micro_models = {LLAMA_3p1_8B_INSTRUCT, GRANITE_3p2_8B_INSTRUCT, GRANITE_GUARDIAN_3p1_8B, MISTRAL_7B_INSTRUCT}
2627

2728

2829
class AIUModelFixtureMixin(ModelFixtureMixin):
@@ -53,7 +54,7 @@ def model(self, uninitialized_model):
5354
return uninitialized_model
5455

5556

56-
decoder_models = [LLAMA_3p1_8B_INSTRUCT, GRANITE_3p2_8B_INSTRUCT, GRANITE_GUARDIAN_3p1_8B]
57+
decoder_models = [LLAMA_3p1_8B_INSTRUCT, GRANITE_3p2_8B_INSTRUCT, GRANITE_GUARDIAN_3p1_8B, MISTRAL_7B_INSTRUCT]
5758

5859

5960
class TestAIUDecoderModels(
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
0.1015625,0.1015625,0.0859375,0.09765625,0.8671875,0.25,0.3671875,0.08984375,0.40234375,0.20703125,0.5546875,0.0234375,0.0,0.24609375,0.26171875,0.546875,0.16796875,0.328125,0.41015625,0.375,0.9140625,0.16796875,0.359375,0.26953125,0.33203125,0.48046875,0.3125,0.26953125,0.2890625,0.68359375,0.5234375,0.80078125,0.30078125,0.2109375,0.3203125,0.47265625,0.3984375,0.69921875,0.59375,0.359375,0.48046875,0.0390625,0.65625,0.4921875,0.6328125,0.68359375,0.76953125,0.34375,0.421875,0.234375,0.5078125,0.27734375,0.203125,0.16796875,0.42578125,0.30078125,0.37109375,0.18359375,0.125,0.19921875,0.29296875,0.15234375,0.15625,0.5859375
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
base_model.dec_norm.weight,base_model.embedding.weight,base_model.layers.0.attn.dense.weight,base_model.layers.0.attn.in_proj.key.weight,base_model.layers.0.attn.in_proj.query.weight,base_model.layers.0.attn.in_proj.value.weight,base_model.layers.0.ff_ln.weight,base_model.layers.0.ff_sub_layer.w1.weight,base_model.layers.0.ff_sub_layer.w2.weight,base_model.layers.0.ff_sub_layer.wg.weight,base_model.layers.0.ln.weight,base_model.layers.1.attn.dense.weight,base_model.layers.1.attn.in_proj.key.weight,base_model.layers.1.attn.in_proj.query.weight,base_model.layers.1.attn.in_proj.value.weight,base_model.layers.1.ff_ln.weight,base_model.layers.1.ff_sub_layer.w1.weight,base_model.layers.1.ff_sub_layer.w2.weight,base_model.layers.1.ff_sub_layer.wg.weight,base_model.layers.1.ln.weight,base_model.layers.2.attn.dense.weight,base_model.layers.2.attn.in_proj.key.weight,base_model.layers.2.attn.in_proj.query.weight,base_model.layers.2.attn.in_proj.value.weight,base_model.layers.2.ff_ln.weight,base_model.layers.2.ff_sub_layer.w1.weight,base_model.layers.2.ff_sub_layer.w2.weight,base_model.layers.2.ff_sub_layer.wg.weight,base_model.layers.2.ln.weight,head.weight

0 commit comments

Comments
 (0)