[test] qwen3 moe w4a16 + skip

HDCharles · HDCharles · commit a8180eed6627 · 2025-11-25T21:38:45.000Z
Summary
This test would ordinarily take too long so we only quantize the first
10 layers

Signed-off-by: HDCharles &lt;charlesdavidhernandez@gmail.com&gt;
diff --git a/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml
@@ -0,0 +1,9 @@
+cadence: "nightly"
+test_type: "regression"
+model: Qwen/Qwen3-30B-A3B
+dataset_id: HuggingFaceH4/ultrachat_200k
+dataset_split: train_sft
+scheme: W4A16_group
+num_calibration_samples: 20
+save_dir: "Qwen3-30B-A3B-W4A16-first-10"
+recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml
diff --git a/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml
@@ -0,0 +1,20 @@
+quant_stage:
+  quant_modifiers:
+    GPTQModifier:
+      ignore: [
+        "lm_head",
+        # Ignore layers (10+)
+        "re:.*model\\.layers\\.([1-9][0-9])\\..*",
+      ]
+      actorder: null
+      config_groups:
+        group_0:
+          weights:
+            num_bits: 4
+            type: "int"
+            symmetric: True
+            strategy: "group"
+            group_size: 128
+          input_activations: null
+          output_activations: null
+          targets: ["Linear"]