vllm-project
diff --git a/‎.buildkite/README.md‎
Lines changed: 9 additions & 6 deletions b/‎.buildkite/README.md‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎.buildkite/features/Collective_Communication_Matmul.yml‎
Lines changed: 2 additions & 0 deletions b/‎.buildkite/features/Collective_Communication_Matmul.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.buildkite/features/JAX-Path_Qxix_Quantization.yml‎
Lines changed: 0 additions & 42 deletions b/‎.buildkite/features/JAX-Path_Qxix_Quantization.yml‎
Lines changed: 0 additions & 42 deletions
diff --git a/‎.buildkite/features/Multimodal_Inputs.yml‎
Lines changed: 3 additions & 0 deletions b/‎.buildkite/features/Multimodal_Inputs.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.buildkite/features/Quantized_Matmul_Attention_and_KV_Cache.yml‎
Lines changed: 23 additions & 19 deletions b/‎.buildkite/features/Quantized_Matmul_Attention_and_KV_Cache.yml‎
Lines changed: 23 additions & 19 deletions
diff --git a/‎.buildkite/features/Speculative_Decoding-_Ngram.yml‎
Lines changed: 3 additions & 0 deletions b/‎.buildkite/features/Speculative_Decoding-_Ngram.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.buildkite/features/Structured_Decoding.yml‎
Lines changed: 2 additions & 0 deletions b/‎.buildkite/features/Structured_Decoding.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.buildkite/features/async_scheduler.yml‎
Lines changed: 3 additions & 0 deletions b/‎.buildkite/features/async_scheduler.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.buildkite/features/default_features.txt‎
Lines changed: 4 additions & 4 deletions b/‎.buildkite/features/default_features.txt‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎.buildkite/models/Qwen_Qwen2_5-VL-7B-Instruct.yml‎
Lines changed: 5 additions & 1 deletion b/‎.buildkite/models/Qwen_Qwen2_5-VL-7B-Instruct.yml‎
Lines changed: 5 additions & 1 deletion
@@ -22,8 +22,9 @@ To support this requirement, each model and feature will go through a series of
 # Adding a new model to CI
 ## Adding a TPU-optimized model
 TPU-optimized models are models we rewrite the model definition as opposed to using the model definition from the vLLM upstream. These models will go through benchmark on top of unit and integration (accuracy) tests. To add a TPU-optimized model to CI, model owners can use the prepared [add_model_to_ci.py](pipeline_generation/add_model_to_ci.py) script. The script will populate a buildkite yaml config file in the `.buildkite/models` directory; config files under this directory will be integrated to our pipeline automatically. The python script takes 2 arguments:
-- **model_name**: this is the **full name** of your model on Hugging Face. Please ensure to use the **full name** (ex: `meta-llama/Llama-3.1-8B` instead of `Llama-3.1-8B`) or else we won't be able to find your model.
-- **queue**: this is the queue you want to run on (ex: `tpu_v6e_queue`)
+- **--model-name**: this is the **full name** of your model on Hugging Face. Please ensure to use the **full name** (ex: `meta-llama/Llama-3.1-8B` instead of `Llama-3.1-8B`) or else we won't be able to find your model.
+- **--queue**: this is the queue you want to run on (ex: `tpu_v6e_queue`)
+- **--category**: this parameter allows you to set the model category, with the following options available: "text-only" or "multimodel".
 
 ```bash
 python add_model_to_ci.py --model-name <MODEL_NAME> --queue <QUEUE_NAME>
@@ -36,8 +37,9 @@ In the generated yml file, there are three TODOs that will need your input:
 
 ## Adding a vLLM-native model
 vLLM-native models are models using the model definition from the vLLM upstream. These models will not go through benchmark on our pipeline. To add a vLLM-native model to CI, model owners can use the prepared [add_model_to_ci.py](pipeline_generation/add_model_to_ci.py) script. The script will populate a buildkite yaml config file in the `.buildkite/models` directory; config files under this directory will be integrated to our pipeline automatically. The python script takes 3 arguments:
-- **model_name**: this is the **full name** of your model on Hugging Face. Please ensure to use the **full name** (ex: `meta-llama/Llama-3.1-8B` instead of `Llama-3.1-8B`) or else we won't be able to find your model.
-- **queue**: this is the queue you want to run on (ex: `tpu_v6e_queue`)
+- **--model-name**: this is the **full name** of your model on Hugging Face. Please ensure to use the **full name** (ex: `meta-llama/Llama-3.1-8B` instead of `Llama-3.1-8B`) or else we won't be able to find your model.
+- **--queue**: this is the queue you want to run on (ex: `tpu_v6e_queue`)
+- **--category**: this parameter allows you to set the model category, with the following options available: "text-only" or "multimodel".
 
 ```bash
 python add_model_to_ci.py --model-name <MODEL_NAME> --queue <QUEUE_NAME> --type vllm-native
@@ -49,8 +51,9 @@ In the generated yml file, there are two TODOs that will need your input:
 
 # Adding a new feature to CI
 To add a new feature to CI, feature owners can use the prepared [add_feature_to_ci.py](pipeline_generation/add_feature_to_ci.py) script. The script will populate a buildkite yaml config file in the `.buildkite/features` directory; config files under this directory will be integrated to our pipeline automatically. The python script takes 2 arguments:
-- **feature_name**: this is the name of your feature
-- **queue**: this is the queue you want to run on (ex: `tpu_v6e_queue`)
+- **--feature-name**: this is the name of your feature
+- **--queue**: this is the queue you want to run on (ex: `tpu_v6e_queue`)
+- **--category**: this parameter allows you to set the feature category, with the following options available: "feature support matrix" or "kernel support matrix".
 
 ```bash
 python add_feature_to_ci.py --feature-name <FEATURE_NAME> --queue <QUEUE_NAME>
 
@@ -1,4 +1,5 @@
 # Collective Communication Matmul
+# kernel support matrix
 steps:
   - label: "Correctness tests for Collective Communication Matmul"
     key: "Collective_Communication_Matmul_CorrectnessTest"
@@ -13,6 +14,7 @@ steps:
     env:
       CI_TARGET: "Collective Communication Matmul"
       CI_STAGE: "CorrectnessTest"
+      CI_CATEGORY: "kernel support matrix"
     agents:
       queue: cpu
     commands:
 
@@ -1,4 +1,5 @@
 # Multimodal Inputs
+# feature support matrix
 steps:
   - label: "Correctness tests for Multimodal Inputs"
     key: "Multimodal_Inputs_CorrectnessTest"
@@ -13,6 +14,7 @@ steps:
     env:
       CI_TARGET: Multimodal Inputs
       CI_STAGE: "CorrectnessTest"
+      CI_CATEGORY: "feature support matrix"
     agents:
       queue: cpu
     commands:
@@ -33,6 +35,7 @@ steps:
     env:
       CI_TARGET: Multimodal Inputs
       CI_STAGE: "PerformanceTest"
+      CI_CATEGORY: "feature support matrix"
     agents:
       queue: cpu
     commands:
 
@@ -1,27 +1,30 @@
 # Quantized Matmul Attention and KV Cache
+# kernel support matrix
 steps:
-  # - label: "Correctness tests for Quantized Matmul Attention and KV Cache"
-  #   key: "Quantized_Matmul_Attention_and_KV_Cache_CorrectnessTest"
-  #   soft_fail: true
-  #   agents:
-  #     queue: cpu
-  #   commands:
-  #     - echo "covered by performance test"
-  # - label: "Record correctness test result for Quantized Matmul Attention and KV Cache"
-  #   key: "record_Quantized_Matmul_Attention_and_KV_Cache_CorrectnessTest"
-  #   depends_on: "Quantized_Matmul_Attention_and_KV_Cache_CorrectnessTest"
-  #   env:
-  #     CI_TARGET: "Quantized Matmul Attention and KV Cache"
-  #     CI_STAGE: "CorrectnessTest"
-  #   agents:
-  #     queue: cpu
-  #   commands:
-  #     - |
-  #       .buildkite/scripts/record_step_result.sh Quantized_Matmul_Attention_and_KV_Cache_CorrectnessTest
+  - label: "Correctness tests for Quantized Matmul Attention and KV Cache"
+    key: "Quantized_Matmul_Attention_and_KV_Cache_CorrectnessTest"
+    soft_fail: true
+    agents:
+      queue: cpu
+    commands:
+      - |
+        buildkite-agent meta-data set "Quantized_Matmul_Attention_and_KV_Cache_CorrectnessTest" "to be added"
+  - label: "Record correctness test result for Quantized Matmul Attention and KV Cache"
+    key: "record_Quantized_Matmul_Attention_and_KV_Cache_CorrectnessTest"
+    depends_on: "Quantized_Matmul_Attention_and_KV_Cache_CorrectnessTest"
+    env:
+      CI_TARGET: "Quantized Matmul Attention and KV Cache"
+      CI_STAGE: "CorrectnessTest"
+      CI_CATEGORY: "kernel support matrix"
+    agents:
+      queue: cpu
+    commands:
+      - |
+        .buildkite/scripts/record_step_result.sh Quantized_Matmul_Attention_and_KV_Cache_CorrectnessTest
 
   - label: "Performance tests for Quantized Matmul Attention and KV Cache"
     key: "Quantized_Matmul_Attention_and_KV_Cache_PerformanceTest"
-    # depends_on: "record_Quantized_Matmul_Attention_and_KV_Cache_CorrectnessTest"
+    depends_on: "record_Quantized_Matmul_Attention_and_KV_Cache_CorrectnessTest"
     soft_fail: true
     agents:
       queue: tpu_v6e_8_queue
@@ -44,6 +47,7 @@ steps:
     env:
       CI_TARGET: "Quantized Matmul Attention and KV Cache"
       CI_STAGE: "PerformanceTest"
+      CI_CATEGORY: "kernel support matrix"
     agents:
       queue: cpu
     commands:
 
@@ -1,4 +1,5 @@
 # Speculative Decoding: Ngram
+# feature support matrix
 steps:
   - label: "Correctness tests for Speculative Decoding: Ngram"
     key: "Speculative_Decoding-_Ngram_CorrectnessTest"
@@ -17,6 +18,7 @@ steps:
     env:
       CI_TARGET: "Speculative Decoding: Ngram"
       CI_STAGE: "CorrectnessTest"
+      CI_CATEGORY: "feature support matrix"
     agents:
       queue: cpu
     commands:
@@ -42,6 +44,7 @@ steps:
     env:
       CI_TARGET: "Speculative Decoding: Ngram"
       CI_STAGE: "PerformanceTest"
+      CI_CATEGORY: "feature support matrix"
     agents:
       queue: cpu
     commands:
 
@@ -1,4 +1,5 @@
 # Structured Decoding
+# feature support matrix
 steps:
   - label: "Correctness tests for Structured Decoding"
     key: "Structured_Decoding_CorrectnessTest"
@@ -13,6 +14,7 @@ steps:
     env:
       CI_TARGET: Structured Decoding
       CI_STAGE: "CorrectnessTest"
+      CI_CATEGORY: "feature support matrix"
     agents:
       queue: cpu
     commands:
 
@@ -1,4 +1,5 @@
 # async scheduler
+# feature support matrix
 steps:
   - label: "Correctness tests for async scheduler"
     key: "async_scheduler_CorrectnessTest"
@@ -13,6 +14,7 @@ steps:
     env:
       CI_TARGET: "async scheduler"
       CI_STAGE: "CorrectnessTest"
+      CI_CATEGORY: "feature support matrix"
     agents:
       queue: cpu
     commands:
@@ -33,6 +35,7 @@ steps:
     env:
       CI_TARGET: "async scheduler"
       CI_STAGE: "PerformanceTest"
+      CI_CATEGORY: "feature support matrix"
     agents:
       queue: cpu
     commands:
 
@@ -1,4 +1,4 @@
-Chunked Prefill
-Prefix Caching
-Ragged Paged Attention V3
-Single Program Multi Data
+Chunked Prefill (feature support matrix)
+Prefix Caching (feature support matrix)
+Ragged Paged Attention V3 (kernel support matrix)
+Single Program Multi Data (feature support matrix)
@@ -1,4 +1,5 @@
 # Qwen/Qwen2.5-VL-7B-Instruct
+# multimodel
 steps:
   - label: "Unit tests for Qwen/Qwen2.5-VL-7B-Instruct"
     key: "Qwen_Qwen2_5-VL-7B-Instruct_UnitTest"
@@ -13,8 +14,9 @@ steps:
     key: "record_Qwen_Qwen2_5-VL-7B-Instruct_UnitTest"
     depends_on: "Qwen_Qwen2_5-VL-7B-Instruct_UnitTest"
     env:
-      CI_STAGE: "UnitTest"
       CI_TARGET: Qwen/Qwen2.5-VL-7B-Instruct
+      CI_STAGE: "UnitTest"
+      CI_CATEGORY: "multimodel"
     agents:
       queue: cpu
     commands:
@@ -40,6 +42,7 @@ steps:
     env:
       CI_TARGET: Qwen/Qwen2.5-VL-7B-Instruct
       CI_STAGE: "IntegrationTest"
+      CI_CATEGORY: "multimodel"
     agents:
       queue: cpu
     commands:
@@ -61,6 +64,7 @@ steps:
     env:
       CI_TARGET: Qwen/Qwen2.5-VL-7B-Instruct
       CI_STAGE: "Benchmark"
+      CI_CATEGORY: "multimodel"
     agents:
       queue: cpu
     commands: