vllm-project
diff --git a/‎.buildkite/README.md‎
Lines changed: 9 additions & 6 deletions b/‎.buildkite/README.md‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎.buildkite/features/Collective_Communication_Matmul.yml‎
Lines changed: 24 additions & 0 deletions b/‎.buildkite/features/Collective_Communication_Matmul.yml‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎.buildkite/features/JAX-Path_Qxix_Quantization.yml‎
Lines changed: 0 additions & 42 deletions b/‎.buildkite/features/JAX-Path_Qxix_Quantization.yml‎
Lines changed: 0 additions & 42 deletions
diff --git a/‎.buildkite/features/MLA.yml‎
Lines changed: 45 additions & 0 deletions b/‎.buildkite/features/MLA.yml‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎.buildkite/features/MoE.yml‎
Lines changed: 45 additions & 0 deletions b/‎.buildkite/features/MoE.yml‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎.buildkite/features/Multimodal_Inputs.yml‎
Lines changed: 3 additions & 0 deletions b/‎.buildkite/features/Multimodal_Inputs.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.buildkite/features/Quantized_Attention.yml‎
Lines changed: 45 additions & 0 deletions b/‎.buildkite/features/Quantized_Attention.yml‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎.buildkite/features/Quantized_KV_Cache.yml‎
Lines changed: 45 additions & 0 deletions b/‎.buildkite/features/Quantized_KV_Cache.yml‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎.buildkite/features/Quantized_Matmul.yml‎
Lines changed: 45 additions & 0 deletions b/‎.buildkite/features/Quantized_Matmul.yml‎
Lines changed: 45 additions & 0 deletions
@@ -22,8 +22,9 @@ To support this requirement, each model and feature will go through a series of
 # Adding a new model to CI
 ## Adding a TPU-optimized model
 TPU-optimized models are models we rewrite the model definition as opposed to using the model definition from the vLLM upstream. These models will go through benchmark on top of unit and integration (accuracy) tests. To add a TPU-optimized model to CI, model owners can use the prepared [add_model_to_ci.py](pipeline_generation/add_model_to_ci.py) script. The script will populate a buildkite yaml config file in the `.buildkite/models` directory; config files under this directory will be integrated to our pipeline automatically. The python script takes 2 arguments:
-- **model_name**: this is the **full name** of your model on Hugging Face. Please ensure to use the **full name** (ex: `meta-llama/Llama-3.1-8B` instead of `Llama-3.1-8B`) or else we won't be able to find your model.
-- **queue**: this is the queue you want to run on (ex: `tpu_v6e_queue`)
+- **--model-name**: this is the **full name** of your model on Hugging Face. Please ensure to use the **full name** (ex: `meta-llama/Llama-3.1-8B` instead of `Llama-3.1-8B`) or else we won't be able to find your model.
+- **--queue**: this is the queue you want to run on (ex: `tpu_v6e_queue`)
+- **--category**: this parameter allows you to set the model category, with the following options available: "text-only" or "multimodel". (default: "text-only")
 
 ```bash
 python add_model_to_ci.py --model-name <MODEL_NAME> --queue <QUEUE_NAME>
@@ -36,8 +37,9 @@ In the generated yml file, there are three TODOs that will need your input:
 
 ## Adding a vLLM-native model
 vLLM-native models are models using the model definition from the vLLM upstream. These models will not go through benchmark on our pipeline. To add a vLLM-native model to CI, model owners can use the prepared [add_model_to_ci.py](pipeline_generation/add_model_to_ci.py) script. The script will populate a buildkite yaml config file in the `.buildkite/models` directory; config files under this directory will be integrated to our pipeline automatically. The python script takes 3 arguments:
-- **model_name**: this is the **full name** of your model on Hugging Face. Please ensure to use the **full name** (ex: `meta-llama/Llama-3.1-8B` instead of `Llama-3.1-8B`) or else we won't be able to find your model.
-- **queue**: this is the queue you want to run on (ex: `tpu_v6e_queue`)
+- **--model-name**: this is the **full name** of your model on Hugging Face. Please ensure to use the **full name** (ex: `meta-llama/Llama-3.1-8B` instead of `Llama-3.1-8B`) or else we won't be able to find your model.
+- **--queue**: this is the queue you want to run on (ex: `tpu_v6e_queue`)
+- **--category**: this parameter allows you to set the model category, with the following options available: "text-only" or "multimodel". (default: "text-only")
 
 ```bash
 python add_model_to_ci.py --model-name <MODEL_NAME> --queue <QUEUE_NAME> --type vllm-native
@@ -49,8 +51,9 @@ In the generated yml file, there are two TODOs that will need your input:
 
 # Adding a new feature to CI
 To add a new feature to CI, feature owners can use the prepared [add_feature_to_ci.py](pipeline_generation/add_feature_to_ci.py) script. The script will populate a buildkite yaml config file in the `.buildkite/features` directory; config files under this directory will be integrated to our pipeline automatically. The python script takes 2 arguments:
-- **feature_name**: this is the name of your feature
-- **queue**: this is the queue you want to run on (ex: `tpu_v6e_queue`)
+- **--feature-name**: this is the name of your feature
+- **--queue**: this is the queue you want to run on (ex: `tpu_v6e_queue`)
+- **--category**: this parameter allows you to set the feature category, with the following options available: "feature support matrix" or "kernel support matrix". (default: "feature support matrix")
 
 ```bash
 python add_feature_to_ci.py --feature-name <FEATURE_NAME> --queue <QUEUE_NAME>
 
@@ -1,4 +1,5 @@
 # Collective Communication Matmul
+# kernel support matrix
 steps:
   - label: "Correctness tests for Collective Communication Matmul"
     key: "Collective_Communication_Matmul_CorrectnessTest"
@@ -13,8 +14,31 @@ steps:
     env:
       CI_TARGET: "Collective Communication Matmul"
       CI_STAGE: "CorrectnessTest"
+      CI_CATEGORY: "kernel support matrix"
     agents:
       queue: cpu
     commands:
       - |
         .buildkite/scripts/record_step_result.sh Collective_Communication_Matmul_CorrectnessTest
+
+  - label: "Performance tests for Collective Communication Matmul"
+    key: "Collective_Communication_Matmul_PerformanceTest"
+    depends_on: "record_Collective_Communication_Matmul_CorrectnessTest"
+    soft_fail: true
+    agents:
+      queue: tpu_v6e_queue
+    commands:
+      - |
+        buildkite-agent meta-data set "Collective_Communication_Matmul_PerformanceTest" "to be added"
+  - label: "Record performance test result for Collective Communication Matmul"
+    key: "record_Collective_Communication_Matmul_PerformanceTest"
+    depends_on: "Collective_Communication_Matmul_PerformanceTest"
+    env:
+      CI_TARGET: "Collective Communication Matmul"
+      CI_STAGE: "PerformanceTest"
+      CI_CATEGORY: "kernel support matrix"
+    agents:
+      queue: cpu
+    commands:
+      - |
+        .buildkite/scripts/record_step_result.sh Collective_Communication_Matmul_PerformanceTest
@@ -0,0 +1,45 @@
+# MLA
+# feature support matrix
+steps:
+  - label: "Correctness tests for MLA"
+    key: "MLA_CorrectnessTest"
+    soft_fail: true
+    agents:
+      queue: tpu_v6e_queue
+    commands:
+      - |
+        buildkite-agent meta-data set "MLA_CorrectnessTest" "to be added"
+  - label: "Record correctness test result for MLA"
+    key: "record_MLA_CorrectnessTest"
+    depends_on: "MLA_CorrectnessTest"
+    env:
+      CI_TARGET: "MLA"
+      CI_STAGE: "CorrectnessTest"
+      CI_CATEGORY: "feature support matrix"
+    agents:
+      queue: cpu
+    commands:
+      - |
+        .buildkite/scripts/record_step_result.sh MLA_CorrectnessTest
+
+  - label: "Performance tests for MLA"
+    key: "MLA_PerformanceTest"
+    depends_on: "record_MLA_CorrectnessTest"
+    soft_fail: true
+    agents:
+      queue: tpu_v6e_queue
+    commands:
+      - |
+        buildkite-agent meta-data set "MLA_PerformanceTest" "to be added"
+  - label: "Record performance test result for MLA"
+    key: "record_MLA_PerformanceTest"
+    depends_on: "MLA_PerformanceTest"
+    env:
+      CI_TARGET: "MLA"
+      CI_STAGE: "PerformanceTest"
+      CI_CATEGORY: "feature support matrix"
+    agents:
+      queue: cpu
+    commands:
+      - |
+        .buildkite/scripts/record_step_result.sh MLA_PerformanceTest
@@ -0,0 +1,45 @@
+# MoE
+# feature support matrix
+steps:
+  - label: "Correctness tests for MoE"
+    key: "MoE_CorrectnessTest"
+    soft_fail: true
+    agents:
+      queue: tpu_v6e_queue
+    commands:
+      - |
+        buildkite-agent meta-data set "MoE_CorrectnessTest" "to be added"
+  - label: "Record correctness test result for MoE"
+    key: "record_MoE_CorrectnessTest"
+    depends_on: "MoE_CorrectnessTest"
+    env:
+      CI_TARGET: "MoE"
+      CI_STAGE: "CorrectnessTest"
+      CI_CATEGORY: "feature support matrix"
+    agents:
+      queue: cpu
+    commands:
+      - |
+        .buildkite/scripts/record_step_result.sh MoE_CorrectnessTest
+
+  - label: "Performance tests for MoE"
+    key: "MoE_PerformanceTest"
+    depends_on: "record_MoE_CorrectnessTest"
+    soft_fail: true
+    agents:
+      queue: tpu_v6e_queue
+    commands:
+      - |
+        buildkite-agent meta-data set "MoE_PerformanceTest" "to be added"
+  - label: "Record performance test result for MoE"
+    key: "record_MoE_PerformanceTest"
+    depends_on: "MoE_PerformanceTest"
+    env:
+      CI_TARGET: "MoE"
+      CI_STAGE: "PerformanceTest"
+      CI_CATEGORY: "feature support matrix"
+    agents:
+      queue: cpu
+    commands:
+      - |
+        .buildkite/scripts/record_step_result.sh MoE_PerformanceTest
@@ -1,4 +1,5 @@
 # Multimodal Inputs
+# feature support matrix
 steps:
   - label: "Correctness tests for Multimodal Inputs"
     key: "Multimodal_Inputs_CorrectnessTest"
@@ -13,6 +14,7 @@ steps:
     env:
       CI_TARGET: Multimodal Inputs
       CI_STAGE: "CorrectnessTest"
+      CI_CATEGORY: "feature support matrix"
     agents:
       queue: cpu
     commands:
@@ -33,6 +35,7 @@ steps:
     env:
       CI_TARGET: Multimodal Inputs
       CI_STAGE: "PerformanceTest"
+      CI_CATEGORY: "feature support matrix"
     agents:
       queue: cpu
     commands:
 
@@ -0,0 +1,45 @@
+# Quantized Attention
+# kernel support matrix
+steps:
+  - label: "Correctness tests for Quantized Attention"
+    key: "Quantized_Attention_CorrectnessTest"
+    soft_fail: true
+    agents:
+      queue: tpu_v6e_queue
+    commands:
+      - |
+        buildkite-agent meta-data set "Quantized_Attention_CorrectnessTest" "to be added"
+  - label: "Record correctness test result for Quantized Attention"
+    key: "record_Quantized_Attention_CorrectnessTest"
+    depends_on: "Quantized_Attention_CorrectnessTest"
+    env:
+      CI_TARGET: "Quantized Attention"
+      CI_STAGE: "CorrectnessTest"
+      CI_CATEGORY: "kernel support matrix"
+    agents:
+      queue: cpu
+    commands:
+      - |
+        .buildkite/scripts/record_step_result.sh Quantized_Attention_CorrectnessTest
+
+  - label: "Performance tests for Quantized Attention"
+    key: "Quantized_Attention_PerformanceTest"
+    depends_on: "record_Quantized_Attention_CorrectnessTest"
+    soft_fail: true
+    agents:
+      queue: tpu_v6e_queue
+    commands:
+      - |
+        buildkite-agent meta-data set "Quantized_Attention_PerformanceTest" "to be added"
+  - label: "Record performance test result for Quantized Attention"
+    key: "record_Quantized_Attention_PerformanceTest"
+    depends_on: "Quantized_Attention_PerformanceTest"
+    env:
+      CI_TARGET: "Quantized Attention"
+      CI_STAGE: "PerformanceTest"
+      CI_CATEGORY: "kernel support matrix"
+    agents:
+      queue: cpu
+    commands:
+      - |
+        .buildkite/scripts/record_step_result.sh Quantized_Attention_PerformanceTest
@@ -0,0 +1,45 @@
+# Quantized KV Cache
+# kernel support matrix
+steps:
+  - label: "Correctness tests for Quantized KV Cache"
+    key: "Quantized_KV_Cache_CorrectnessTest"
+    soft_fail: true
+    agents:
+      queue: tpu_v6e_queue
+    commands:
+      - |
+        buildkite-agent meta-data set "Quantized_KV_Cache_CorrectnessTest" "to be added"
+  - label: "Record correctness test result for Quantized KV Cache"
+    key: "record_Quantized_KV_Cache_CorrectnessTest"
+    depends_on: "Quantized_KV_Cache_CorrectnessTest"
+    env:
+      CI_TARGET: "Quantized KV Cache"
+      CI_STAGE: "CorrectnessTest"
+      CI_CATEGORY: "kernel support matrix"
+    agents:
+      queue: cpu
+    commands:
+      - |
+        .buildkite/scripts/record_step_result.sh Quantized_KV_Cache_CorrectnessTest
+
+  - label: "Performance tests for Quantized KV Cache"
+    key: "Quantized_KV_Cache_PerformanceTest"
+    depends_on: "record_Quantized_KV_Cache_CorrectnessTest"
+    soft_fail: true
+    agents:
+      queue: tpu_v6e_queue
+    commands:
+      - |
+        buildkite-agent meta-data set "Quantized_KV_Cache_PerformanceTest" "to be added"
+  - label: "Record performance test result for Quantized KV Cache"
+    key: "record_Quantized_KV_Cache_PerformanceTest"
+    depends_on: "Quantized_KV_Cache_PerformanceTest"
+    env:
+      CI_TARGET: "Quantized KV Cache"
+      CI_STAGE: "PerformanceTest"
+      CI_CATEGORY: "kernel support matrix"
+    agents:
+      queue: cpu
+    commands:
+      - |
+        .buildkite/scripts/record_step_result.sh Quantized_KV_Cache_PerformanceTest
@@ -0,0 +1,45 @@
+# Quantized Matmul
+# kernel support matrix
+steps:
+  - label: "Correctness tests for Quantized Matmul"
+    key: "Quantized_Matmul_CorrectnessTest"
+    soft_fail: true
+    agents:
+      queue: tpu_v6e_queue
+    commands:
+      - |
+        buildkite-agent meta-data set "Quantized_Matmul_CorrectnessTest" "to be added"
+  - label: "Record correctness test result for Quantized Matmul"
+    key: "record_Quantized_Matmul_CorrectnessTest"
+    depends_on: "Quantized_Matmul_CorrectnessTest"
+    env:
+      CI_TARGET: "Quantized Matmul"
+      CI_STAGE: "CorrectnessTest"
+      CI_CATEGORY: "kernel support matrix"
+    agents:
+      queue: cpu
+    commands:
+      - |
+        .buildkite/scripts/record_step_result.sh Quantized_Matmul_CorrectnessTest
+
+  - label: "Performance tests for Quantized Matmul"
+    key: "Quantized_Matmul_PerformanceTest"
+    depends_on: "record_Quantized_Matmul_CorrectnessTest"
+    soft_fail: true
+    agents:
+      queue: tpu_v6e_queue
+    commands:
+      - |
+        buildkite-agent meta-data set "Quantized_Matmul_PerformanceTest" "to be added"
+  - label: "Record performance test result for Quantized Matmul"
+    key: "record_Quantized_Matmul_PerformanceTest"
+    depends_on: "Quantized_Matmul_PerformanceTest"
+    env:
+      CI_TARGET: "Quantized Matmul"
+      CI_STAGE: "PerformanceTest"
+      CI_CATEGORY: "kernel support matrix"
+    agents:
+      queue: cpu
+    commands:
+      - |
+        .buildkite/scripts/record_step_result.sh Quantized_Matmul_PerformanceTest