From f0444c423df4fadad6bdc3e6ada5933b7e321327 Mon Sep 17 00:00:00 2001
From: zhewenli <zhewenli@meta.com>
Date: Wed, 5 Nov 2025 11:56:28 -0800
Subject: [PATCH 1/6] update

Signed-off-by: zhewenli <zhewenli@meta.com>
---
 .buildkite/test-amd.yaml                      |  2 +-
 pyproject.toml                                |  1 +
 tests/conftest.py                             | 42 +++++++++++++++----
 .../openai/test_transcription_validation.py   |  9 ++++
 .../openai/test_translation_validation.py     |  5 +++
 tests/entrypoints/openai/test_vision.py       | 11 +++++
 .../pooling/correctness/test_mteb_score.py    |  2 +
 .../entrypoints/pooling/llm/test_embedding.py |  2 +
 tests/entrypoints/pooling/llm/test_encode.py  |  2 +
 .../pooling/openai/test_embedding.py          |  2 +
 .../openai/test_embedding_long_text.py        |  2 +
 .../entrypoints/pooling/openai/test_rerank.py |  2 +
 .../pooling/openai/test_vision_embedding.py   |  2 +
 .../models/language/pooling/test_embedding.py | 16 +++++--
 14 files changed, 89 insertions(+), 11 deletions(-)

diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index bb5ef5d62463..f0493a1afd26 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -727,7 +727,7 @@ steps:
 
 - label: Language Models Tests (Standard)
   timeout_in_minutes: 25
-  mirror_hardwares: [amdexperimental]
+  mirror_hardwares: [amdexperimental, amdproduction]
   agent_pool: mi325_1
   # grade: Blocking
   torch_nightly: true
diff --git a/pyproject.toml b/pyproject.toml
index 29ee7f75f070..8f35054a0799 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -107,6 +107,7 @@ markers = [
     "distributed: run this test only in distributed GPU tests",
     "skip_v1: do not run this test with v1",
     "optional: optional tests that are automatically skipped, include --optional to run them",
+    "encoder_decoder: tests that use encoder-decoder models, skipped on ROCm",
 ]
 
 [tool.ty.src]
diff --git a/tests/conftest.py b/tests/conftest.py
index 5e127e4e939e..4a79041ecdf1 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1250,13 +1250,41 @@ def pytest_addoption(parser):
 
 
 def pytest_collection_modifyitems(config, items):
-    if config.getoption("--optional"):
-        # --optional given in cli: do not skip optional tests
-        return
-    skip_optional = pytest.mark.skip(reason="need --optional option to run")
-    for item in items:
-        if "optional" in item.keywords:
-            item.add_marker(skip_optional)
+    if not config.getoption("--optional"):
+        skip_optional = pytest.mark.skip(reason="need --optional option to run")
+        for item in items:
+            if "optional" in item.keywords:
+                item.add_marker(skip_optional)
+
+    from vllm.platforms import current_platform
+
+    # Skip encoder-decoder models on ROCm: https://github.com/vllm-project/vllm/issues/27442
+    ENCODER_DECODER_MODELS = [
+        # Encoder-decoder models
+        "openai/whisper-small",
+        "openai/whisper-large-v3-turbo",
+        "mistralai/Voxtral-Mini-3B-2507",
+        "microsoft/Phi-3.5-vision-instruct",
+        # Encoder-only models (cross-encoders, embedding models)
+        "cross-encoder/ms-marco-MiniLM-L-6-v2",
+        "intfloat/multilingual-e5-small",
+        "BAAI/bge-reranker-base",
+        "BAAI/bge-base-en-v1.5",
+        "TIGER-Lab/VLM2Vec-Full",
+        "sentence-transformers/all-MiniLM-L12-v2",
+        "sentence-transformers/stsb-roberta-base-v2",
+    ]
+
+    if current_platform.is_rocm():
+        skip_encoder_decoder = pytest.mark.skip(
+            reason="Encoder-decoder models not supported on ROCm (all ROCm attention backends only support decoder-only models)"
+        )
+        for item in items:
+            if "encoder_decoder" in item.keywords:
+                for encoder_model in ENCODER_DECODER_MODELS:
+                    if encoder_model in item.nodeid:
+                        item.add_marker(skip_encoder_decoder)
+                        break
 
 
 @pytest.fixture(scope="session")
diff --git a/tests/entrypoints/openai/test_transcription_validation.py b/tests/entrypoints/openai/test_transcription_validation.py
index 88580ed899f1..dc28eae3c87f 100644
--- a/tests/entrypoints/openai/test_transcription_validation.py
+++ b/tests/entrypoints/openai/test_transcription_validation.py
@@ -39,6 +39,7 @@ async def client(server):
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 @pytest.mark.parametrize(
     "model_name", ["openai/whisper-large-v3-turbo", "mistralai/Voxtral-Mini-3B-2507"]
 )
@@ -66,6 +67,7 @@ async def test_basic_audio(mary_had_lamb, model_name):
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 async def test_basic_audio_with_lora(mary_had_lamb):
     """Ensure STT (transcribe) requests can pass LoRA through to generate."""
     model_name = "ibm-granite/granite-speech-3.3-2b"
@@ -137,6 +139,7 @@ async def test_non_asr_model(winning_call):
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 async def test_bad_requests(mary_had_lamb, client):
     # invalid language
     with pytest.raises(openai.BadRequestError):
@@ -146,6 +149,7 @@ async def test_bad_requests(mary_had_lamb, client):
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 async def test_long_audio_request(mary_had_lamb, client):
     mary_had_lamb.seek(0)
     audio, sr = librosa.load(mary_had_lamb)
@@ -172,6 +176,7 @@ async def test_long_audio_request(mary_had_lamb, client):
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 async def test_completion_endpoints(client):
     # text to text model
     res = await client.chat.completions.create(
@@ -189,6 +194,7 @@ async def test_completion_endpoints(client):
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 async def test_streaming_response(winning_call, client):
     transcription = ""
     res_no_stream = await client.audio.transcriptions.create(
@@ -215,6 +221,7 @@ async def test_streaming_response(winning_call, client):
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 async def test_stream_options(winning_call, client):
     res = await client.audio.transcriptions.create(
         model=MODEL_NAME,
@@ -237,6 +244,7 @@ async def test_stream_options(winning_call, client):
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 async def test_sampling_params(mary_had_lamb, client):
     """
     Compare sampling with params and greedy sampling to assert results
@@ -270,6 +278,7 @@ async def test_sampling_params(mary_had_lamb, client):
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 async def test_audio_prompt(mary_had_lamb, client):
     prompt = "This is a speech, recorded in a phonograph."
     # Prompts should not omit the part of original prompt while transcribing.
diff --git a/tests/entrypoints/openai/test_translation_validation.py b/tests/entrypoints/openai/test_translation_validation.py
index c060ee2b1922..16e6c142534a 100644
--- a/tests/entrypoints/openai/test_translation_validation.py
+++ b/tests/entrypoints/openai/test_translation_validation.py
@@ -84,6 +84,7 @@ async def test_basic_audio_with_lora(mary_had_lamb):
 
 # NOTE: (NickLucche) the large-v3-turbo model was not trained on translation!
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 async def test_basic_audio(foscolo, client_and_model):
     client, model_name = client_and_model
     translation = await client.audio.translations.create(
@@ -99,6 +100,7 @@ async def test_basic_audio(foscolo, client_and_model):
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 async def test_audio_prompt(foscolo, client_and_model):
     client, model_name = client_and_model
     # Condition whisper on starting text
@@ -117,6 +119,7 @@ async def test_audio_prompt(foscolo, client_and_model):
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 async def test_streaming_response(foscolo, client_and_model, server):
     client, model_name = client_and_model
     translation = ""
@@ -168,6 +171,7 @@ async def test_streaming_response(foscolo, client_and_model, server):
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 async def test_stream_options(foscolo, server):
     server, model_name = server
     url = server.url_for("v1/audio/translations")
@@ -207,6 +211,7 @@ async def test_stream_options(foscolo, server):
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 async def test_long_audio_request(foscolo, client_and_model):
     client, model_name = client_and_model
     if model_name == "google/gemma-3n-E2B-it":
diff --git a/tests/entrypoints/openai/test_vision.py b/tests/entrypoints/openai/test_vision.py
index 2a7df08ea3b0..0022739a2621 100644
--- a/tests/entrypoints/openai/test_vision.py
+++ b/tests/entrypoints/openai/test_vision.py
@@ -122,6 +122,7 @@ def get_hf_prompt_tokens(model_name, content, image_url):
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
 async def test_single_chat_session_image(
@@ -169,6 +170,7 @@ async def test_single_chat_session_image(
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
 async def test_error_on_invalid_image_url_type(
@@ -196,6 +198,7 @@ async def test_error_on_invalid_image_url_type(
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
 async def test_single_chat_session_image_beamsearch(
@@ -221,6 +224,7 @@ async def test_single_chat_session_image_beamsearch(
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize("raw_image_url", TEST_IMAGE_ASSETS)
 @pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
@@ -277,6 +281,7 @@ async def test_single_chat_session_image_base64encoded(
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize("image_idx", list(range(len(TEST_IMAGE_ASSETS))))
 async def test_single_chat_session_image_base64encoded_beamsearch(
@@ -307,6 +312,7 @@ async def test_single_chat_session_image_base64encoded_beamsearch(
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
 async def test_chat_streaming_image(
@@ -350,6 +356,7 @@ async def test_chat_streaming_image(
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize(
     "image_urls",
@@ -391,6 +398,7 @@ async def test_multi_image_input(
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize(
     "image_urls",
@@ -430,6 +438,7 @@ async def test_completions_with_image(
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize(
     "image_urls",
@@ -493,6 +502,7 @@ async def test_completions_with_image_with_uuid(
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 async def test_completions_with_empty_image_with_uuid_without_cache_hit(
     client: openai.AsyncOpenAI,
@@ -522,6 +532,7 @@ async def test_completions_with_empty_image_with_uuid_without_cache_hit(
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize(
     "image_urls",
diff --git a/tests/entrypoints/pooling/correctness/test_mteb_score.py b/tests/entrypoints/pooling/correctness/test_mteb_score.py
index 1afe68b189db..50d73f080e65 100644
--- a/tests/entrypoints/pooling/correctness/test_mteb_score.py
+++ b/tests/entrypoints/pooling/correctness/test_mteb_score.py
@@ -28,6 +28,7 @@ def server():
         yield remote_server
 
 
+@pytest.mark.encoder_decoder
 def test_mteb_score(server):
     url = server.url_for("score")
     encoder = ScoreClientMtebEncoder(MODEL_NAME, url)
@@ -42,6 +43,7 @@ def test_mteb_score(server):
     assert st_main_score - vllm_main_score < MTEB_RERANK_TOL
 
 
+@pytest.mark.encoder_decoder
 def test_mteb_rerank(server):
     url = server.url_for("rerank")
     encoder = RerankClientMtebEncoder(MODEL_NAME, url)
diff --git a/tests/entrypoints/pooling/llm/test_embedding.py b/tests/entrypoints/pooling/llm/test_embedding.py
index 5455b5f91fc0..5c55f9d3b789 100644
--- a/tests/entrypoints/pooling/llm/test_embedding.py
+++ b/tests/entrypoints/pooling/llm/test_embedding.py
@@ -10,6 +10,8 @@
 from vllm import LLM, PoolingParams
 from vllm.distributed import cleanup_dist_env_and_memory
 
+pytestmark = pytest.mark.encoder_decoder
+
 MODEL_NAME = "intfloat/multilingual-e5-small"
 
 prompts = ["The chef prepared a delicious meal."]
diff --git a/tests/entrypoints/pooling/llm/test_encode.py b/tests/entrypoints/pooling/llm/test_encode.py
index ca85d2758fce..dd98857f6025 100644
--- a/tests/entrypoints/pooling/llm/test_encode.py
+++ b/tests/entrypoints/pooling/llm/test_encode.py
@@ -8,6 +8,8 @@
 from vllm import LLM, PoolingParams
 from vllm.distributed import cleanup_dist_env_and_memory
 
+pytestmark = pytest.mark.encoder_decoder
+
 MODEL_NAME = "intfloat/multilingual-e5-small"
 
 PROMPTS = [
diff --git a/tests/entrypoints/pooling/openai/test_embedding.py b/tests/entrypoints/pooling/openai/test_embedding.py
index e971b23e8f1a..a7e4e857d2ee 100644
--- a/tests/entrypoints/pooling/openai/test_embedding.py
+++ b/tests/entrypoints/pooling/openai/test_embedding.py
@@ -28,6 +28,8 @@
     decode_pooling_output,
 )
 
+pytestmark = pytest.mark.encoder_decoder
+
 MODEL_NAME = "intfloat/multilingual-e5-small"
 DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}"""  # noqa: E501
 DTYPE = "bfloat16"
diff --git a/tests/entrypoints/pooling/openai/test_embedding_long_text.py b/tests/entrypoints/pooling/openai/test_embedding_long_text.py
index f977c81a9084..4a24201af3df 100644
--- a/tests/entrypoints/pooling/openai/test_embedding_long_text.py
+++ b/tests/entrypoints/pooling/openai/test_embedding_long_text.py
@@ -17,6 +17,8 @@
 from tests.utils import RemoteOpenAIServer
 from vllm.entrypoints.openai.protocol import EmbeddingResponse
 
+pytestmark = pytest.mark.encoder_decoder
+
 
 def _generate_random_text(word_count: int) -> str:
     """Generate random text with approximately the specified word count."""
diff --git a/tests/entrypoints/pooling/openai/test_rerank.py b/tests/entrypoints/pooling/openai/test_rerank.py
index 1d85190c12a1..1d065c7687aa 100644
--- a/tests/entrypoints/pooling/openai/test_rerank.py
+++ b/tests/entrypoints/pooling/openai/test_rerank.py
@@ -9,6 +9,8 @@
 from tests.utils import RemoteOpenAIServer
 from vllm.entrypoints.openai.protocol import PoolingResponse, RerankResponse
 
+pytestmark = pytest.mark.encoder_decoder
+
 MODEL_NAME = "BAAI/bge-reranker-base"
 DTYPE = "bfloat16"
 
diff --git a/tests/entrypoints/pooling/openai/test_vision_embedding.py b/tests/entrypoints/pooling/openai/test_vision_embedding.py
index 944392d66fa5..3d0fbc9a38a9 100644
--- a/tests/entrypoints/pooling/openai/test_vision_embedding.py
+++ b/tests/entrypoints/pooling/openai/test_vision_embedding.py
@@ -11,6 +11,8 @@
 from vllm.entrypoints.openai.protocol import EmbeddingResponse
 from vllm.multimodal.utils import encode_image_base64, fetch_image
 
+pytestmark = pytest.mark.encoder_decoder
+
 MODEL_NAME = "TIGER-Lab/VLM2Vec-Full"
 MAXIMUM_IMAGES = 2
 
diff --git a/tests/models/language/pooling/test_embedding.py b/tests/models/language/pooling/test_embedding.py
index c8deffbf66db..fd4095188c28 100644
--- a/tests/models/language/pooling/test_embedding.py
+++ b/tests/models/language/pooling/test_embedding.py
@@ -36,14 +36,24 @@
                 pytest.mark.core_model,
                 pytest.mark.cpu_model,
                 pytest.mark.slow_test,
+                pytest.mark.encoder_decoder,
             ],
         ),
-        pytest.param("sentence-transformers/all-MiniLM-L12-v2"),
-        pytest.param("intfloat/multilingual-e5-small"),
+        pytest.param(
+            "sentence-transformers/all-MiniLM-L12-v2",
+            marks=[pytest.mark.encoder_decoder],
+        ),
+        pytest.param(
+            "intfloat/multilingual-e5-small", marks=[pytest.mark.encoder_decoder]
+        ),
         # [Cross-Encoder]
         pytest.param(
             "sentence-transformers/stsb-roberta-base-v2",
-            marks=[pytest.mark.core_model, pytest.mark.cpu_model],
+            marks=[
+                pytest.mark.core_model,
+                pytest.mark.cpu_model,
+                pytest.mark.encoder_decoder,
+            ],
         ),
     ],
 )

From bc57795c796cda0218d8c9139e36673fd4e430ec Mon Sep 17 00:00:00 2001
From: zhewenli <zhewenli@meta.com>
Date: Thu, 6 Nov 2025 13:28:50 -0800
Subject: [PATCH 2/6] update tests

Signed-off-by: zhewenli <zhewenli@meta.com>
---
 .buildkite/test-amd.yaml                      | 31 ++++++++--
 tests/conftest.py                             | 12 ++--
 .../openai/test_enable_force_include_usage.py |  7 ++-
 tests/entrypoints/openai/test_run_batch.py    | 19 +++++++
 .../openai/test_transcription_validation.py   | 57 +++++++++++--------
 tests/entrypoints/openai/test_vision.py       | 11 ----
 .../pooling/correctness/test_mteb_score.py    |  2 -
 .../entrypoints/pooling/llm/test_embedding.py |  2 -
 tests/entrypoints/pooling/llm/test_encode.py  |  2 -
 .../pooling/openai/test_embedding.py          |  2 -
 .../openai/test_embedding_long_text.py        |  2 -
 .../entrypoints/pooling/openai/test_rerank.py |  2 -
 .../pooling/openai/test_vision_embedding.py   |  2 -
 13 files changed, 90 insertions(+), 61 deletions(-)

diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index f0493a1afd26..cccfd73ef8a4 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -144,7 +144,7 @@ steps:
 
 - label: Entrypoints Integration Test (API Server) # 100min
   timeout_in_minutes: 130
-  mirror_hardwares: [amdexperimental]
+  mirror_hardwares: [amdexperimental, amdproduction]
   agent_pool: mi325_1
   # grade: Blocking
   working_dir: "/vllm-workspace/tests"
@@ -157,12 +157,21 @@ steps:
   commands:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
   - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/openai/test_collective_rpc.py # PYTHONPATH is needed to import custom Worker extension
-  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/test_collective_rpc.py --ignore=entrypoints/openai/tool_parsers/
+  - >-
+      pytest -v -s entrypoints/openai
+      --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py
+      --ignore=entrypoints/openai/test_oot_registration.py
+      --ignore=entrypoints/openai/test_tensorizer_entrypoint.py
+      --ignore=entrypoints/openai/correctness/
+      --ignore=entrypoints/openai/test_collective_rpc.py
+      --ignore=entrypoints/openai/tool_parsers/
+      --ignore=entrypoints/openai/test_vision.py
+      --ignore=entrypoints/openai/test_optional_middleware.py
   - pytest -v -s entrypoints/test_chat_utils.py
 
 - label: Entrypoints Integration Test (Pooling)
   timeout_in_minutes: 50
-  mirror_hardwares: [amdexperimental]
+  mirror_hardwares: [amdexperimental, amdproduction]
   agent_pool: mi325_1
   # grade: Blocking
   working_dir: "/vllm-workspace/tests"
@@ -173,7 +182,21 @@ steps:
   - tests/entrypoints/pooling
   commands:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/pooling
+  # Encoder-only models not supported on ROCm (all tests use encoder/cross-encoder models)
+  # See .buildkite/rocm_encoder_decoder_skip.md for details
+  - >-
+      pytest -v -s entrypoints/pooling
+      --ignore=entrypoints/pooling/correctness/test_mteb_score.py
+      --ignore=entrypoints/pooling/correctness/test_mteb_embed.py
+      --ignore=entrypoints/pooling/llm/test_embedding.py
+      --ignore=entrypoints/pooling/llm/test_encode.py
+      --ignore=entrypoints/pooling/openai/test_embedding.py
+      --ignore=entrypoints/pooling/openai/test_embedding_dimensions.py
+      --ignore=entrypoints/pooling/openai/test_embedding_long_text.py
+      --ignore=entrypoints/pooling/openai/test_rerank.py
+      --ignore=entrypoints/pooling/openai/test_score.py
+      --ignore=entrypoints/pooling/openai/test_truncation.py
+      --ignore=entrypoints/pooling/openai/test_vision_embedding.py
 
 - label: Distributed Tests (4 GPUs) # 35min
   timeout_in_minutes: 50
diff --git a/tests/conftest.py b/tests/conftest.py
index 4a79041ecdf1..b1bf880cc29f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1267,10 +1267,13 @@ def pytest_collection_modifyitems(config, items):
         "microsoft/Phi-3.5-vision-instruct",
         # Encoder-only models (cross-encoders, embedding models)
         "cross-encoder/ms-marco-MiniLM-L-6-v2",
+        "intfloat/e5-small",
         "intfloat/multilingual-e5-small",
         "BAAI/bge-reranker-base",
+        "BAAI/bge-reranker-v2-m3",
         "BAAI/bge-base-en-v1.5",
         "TIGER-Lab/VLM2Vec-Full",
+        "Snowflake/snowflake-arctic-embed-m-v1.5",
         "sentence-transformers/all-MiniLM-L12-v2",
         "sentence-transformers/stsb-roberta-base-v2",
     ]
@@ -1281,10 +1284,11 @@ def pytest_collection_modifyitems(config, items):
         )
         for item in items:
             if "encoder_decoder" in item.keywords:
-                for encoder_model in ENCODER_DECODER_MODELS:
-                    if encoder_model in item.nodeid:
-                        item.add_marker(skip_encoder_decoder)
-                        break
+                if any(
+                    encoder_model in item.nodeid
+                    for encoder_model in ENCODER_DECODER_MODELS
+                ):
+                    item.add_marker(skip_encoder_decoder)
 
 
 @pytest.fixture(scope="session")
diff --git a/tests/entrypoints/openai/test_enable_force_include_usage.py b/tests/entrypoints/openai/test_enable_force_include_usage.py
index 3ddf2308eb1d..27dc9a242f27 100644
--- a/tests/entrypoints/openai/test_enable_force_include_usage.py
+++ b/tests/entrypoints/openai/test_enable_force_include_usage.py
@@ -71,8 +71,8 @@ async def test_chat_with_enable_force_include_usage(
             assert chunk.usage is None
 
 
-@pytest.fixture(scope="module")
-def transcription_server_with_force_include_usage():
+@pytest.fixture(scope="module", params=["openai/whisper-large-v3-turbo"])
+def transcription_server_with_force_include_usage(request):
     args = [
         # use half precision for speed and memory savings in CI environment
         "--dtype",
@@ -85,7 +85,7 @@ def transcription_server_with_force_include_usage():
         "0.2",
     ]
 
-    with RemoteOpenAIServer("openai/whisper-large-v3-turbo", args) as remote_server:
+    with RemoteOpenAIServer(request.param, args) as remote_server:
         yield remote_server
 
 
@@ -100,6 +100,7 @@ async def transcription_client_with_force_include_usage(
 
 
 @pytest.mark.asyncio
+@pytest.mark.encoder_decoder
 async def test_transcription_with_enable_force_include_usage(
     transcription_client_with_force_include_usage, winning_call
 ):
diff --git a/tests/entrypoints/openai/test_run_batch.py b/tests/entrypoints/openai/test_run_batch.py
index 2f678a0535cc..d809d35a31e1 100644
--- a/tests/entrypoints/openai/test_run_batch.py
+++ b/tests/entrypoints/openai/test_run_batch.py
@@ -44,6 +44,13 @@
 
 
 def test_empty_file():
+    from vllm.platforms import current_platform
+
+    if current_platform.is_rocm():
+        pytest.skip(
+            "intfloat/multilingual-e5-small is encoder-only, not supported on ROCm"
+        )
+
     with (
         tempfile.NamedTemporaryFile("w") as input_file,
         tempfile.NamedTemporaryFile("r") as output_file,
@@ -128,6 +135,13 @@ def test_completions_invalid_input():
 
 
 def test_embeddings():
+    from vllm.platforms import current_platform
+
+    if current_platform.is_rocm():
+        pytest.skip(
+            "intfloat/multilingual-e5-small is encoder-only, not supported on ROCm"
+        )
+
     with (
         tempfile.NamedTemporaryFile("w") as input_file,
         tempfile.NamedTemporaryFile("r") as output_file,
@@ -159,6 +173,11 @@ def test_embeddings():
 
 @pytest.mark.parametrize("input_batch", [INPUT_SCORE_BATCH, INPUT_RERANK_BATCH])
 def test_score(input_batch):
+    from vllm.platforms import current_platform
+
+    if current_platform.is_rocm():
+        pytest.skip("BAAI/bge-reranker-v2-m3 is encoder-only, not supported on ROCm")
+
     with (
         tempfile.NamedTemporaryFile("w") as input_file,
         tempfile.NamedTemporaryFile("r") as output_file,
diff --git a/tests/entrypoints/openai/test_transcription_validation.py b/tests/entrypoints/openai/test_transcription_validation.py
index dc28eae3c87f..cc5eae216a66 100644
--- a/tests/entrypoints/openai/test_transcription_validation.py
+++ b/tests/entrypoints/openai/test_transcription_validation.py
@@ -14,7 +14,6 @@
 
 from ...utils import RemoteOpenAIServer
 
-MODEL_NAME = "openai/whisper-large-v3-turbo"
 SERVER_ARGS = ["--enforce-eager"]
 MISTRAL_FORMAT_ARGS = [
     "--tokenizer_mode",
@@ -26,16 +25,17 @@
 ]
 
 
-@pytest.fixture(scope="module")
-def server():
-    with RemoteOpenAIServer(MODEL_NAME, SERVER_ARGS) as remote_server:
-        yield remote_server
+@pytest.fixture(scope="module", params=["openai/whisper-large-v3-turbo"])
+def server(request):
+    with RemoteOpenAIServer(request.param, SERVER_ARGS) as remote_server:
+        yield remote_server, request.param
 
 
 @pytest_asyncio.fixture
-async def client(server):
+async def client_and_model(server):
+    server, model_name = server
     async with server.get_async_client() as async_client:
-        yield async_client
+        yield async_client, model_name
 
 
 @pytest.mark.asyncio
@@ -140,17 +140,19 @@ async def test_non_asr_model(winning_call):
 
 @pytest.mark.asyncio
 @pytest.mark.encoder_decoder
-async def test_bad_requests(mary_had_lamb, client):
+async def test_bad_requests(mary_had_lamb, client_and_model):
+    client, model_name = client_and_model
     # invalid language
     with pytest.raises(openai.BadRequestError):
         await client.audio.transcriptions.create(
-            model=MODEL_NAME, file=mary_had_lamb, language="hh", temperature=0.0
+            model=model_name, file=mary_had_lamb, language="hh", temperature=0.0
         )
 
 
 @pytest.mark.asyncio
 @pytest.mark.encoder_decoder
-async def test_long_audio_request(mary_had_lamb, client):
+async def test_long_audio_request(mary_had_lamb, client_and_model):
+    client, model_name = client_and_model
     mary_had_lamb.seek(0)
     audio, sr = librosa.load(mary_had_lamb)
     # Add small silence after each audio for repeatability in the split process
@@ -161,7 +163,7 @@ async def test_long_audio_request(mary_had_lamb, client):
     sf.write(buffer, repeated_audio, sr, format="WAV")
     buffer.seek(0)
     transcription = await client.audio.transcriptions.create(
-        model=MODEL_NAME,
+        model=model_name,
         file=buffer,
         language="en",
         response_format="text",
@@ -177,17 +179,18 @@ async def test_long_audio_request(mary_had_lamb, client):
 
 @pytest.mark.asyncio
 @pytest.mark.encoder_decoder
-async def test_completion_endpoints(client):
+async def test_completion_endpoints(client_and_model):
+    client, model_name = client_and_model
     # text to text model
     res = await client.chat.completions.create(
-        model=MODEL_NAME,
+        model=model_name,
         messages=[{"role": "system", "content": "You are a helpful assistant."}],
     )
     err = res.error
     assert err["code"] == 400
     assert err["message"] == "The model does not support Chat Completions API"
 
-    res = await client.completions.create(model=MODEL_NAME, prompt="Hello")
+    res = await client.completions.create(model=model_name, prompt="Hello")
     err = res.error
     assert err["code"] == 400
     assert err["message"] == "The model does not support Completions API"
@@ -195,17 +198,18 @@ async def test_completion_endpoints(client):
 
 @pytest.mark.asyncio
 @pytest.mark.encoder_decoder
-async def test_streaming_response(winning_call, client):
+async def test_streaming_response(winning_call, client_and_model):
+    client, model_name = client_and_model
     transcription = ""
     res_no_stream = await client.audio.transcriptions.create(
-        model=MODEL_NAME,
+        model=model_name,
         file=winning_call,
         response_format="json",
         language="en",
         temperature=0.0,
     )
     res = await client.audio.transcriptions.create(
-        model=MODEL_NAME,
+        model=model_name,
         file=winning_call,
         language="en",
         temperature=0.0,
@@ -222,9 +226,10 @@ async def test_streaming_response(winning_call, client):
 
 @pytest.mark.asyncio
 @pytest.mark.encoder_decoder
-async def test_stream_options(winning_call, client):
+async def test_stream_options(winning_call, client_and_model):
+    client, model_name = client_and_model
     res = await client.audio.transcriptions.create(
-        model=MODEL_NAME,
+        model=model_name,
         file=winning_call,
         language="en",
         temperature=0.0,
@@ -245,13 +250,14 @@ async def test_stream_options(winning_call, client):
 
 @pytest.mark.asyncio
 @pytest.mark.encoder_decoder
-async def test_sampling_params(mary_had_lamb, client):
+async def test_sampling_params(mary_had_lamb, client_and_model):
+    client, model_name = client_and_model
     """
     Compare sampling with params and greedy sampling to assert results
     are different when extreme sampling parameters values are picked.
     """
     transcription = await client.audio.transcriptions.create(
-        model=MODEL_NAME,
+        model=model_name,
         file=mary_had_lamb,
         language="en",
         temperature=0.8,
@@ -267,7 +273,7 @@ async def test_sampling_params(mary_had_lamb, client):
     )
 
     greedy_transcription = await client.audio.transcriptions.create(
-        model=MODEL_NAME,
+        model=model_name,
         file=mary_had_lamb,
         language="en",
         temperature=0.0,
@@ -279,12 +285,13 @@ async def test_sampling_params(mary_had_lamb, client):
 
 @pytest.mark.asyncio
 @pytest.mark.encoder_decoder
-async def test_audio_prompt(mary_had_lamb, client):
+async def test_audio_prompt(mary_had_lamb, client_and_model):
+    client, model_name = client_and_model
     prompt = "This is a speech, recorded in a phonograph."
     # Prompts should not omit the part of original prompt while transcribing.
     prefix = "The first words I spoke in the original phonograph"
     transcription = await client.audio.transcriptions.create(
-        model=MODEL_NAME,
+        model=model_name,
         file=mary_had_lamb,
         language="en",
         response_format="text",
@@ -293,7 +300,7 @@ async def test_audio_prompt(mary_had_lamb, client):
     out = json.loads(transcription)["text"]
     assert prefix in out
     transcription_wprompt = await client.audio.transcriptions.create(
-        model=MODEL_NAME,
+        model=model_name,
         file=mary_had_lamb,
         language="en",
         response_format="text",
diff --git a/tests/entrypoints/openai/test_vision.py b/tests/entrypoints/openai/test_vision.py
index 0022739a2621..2a7df08ea3b0 100644
--- a/tests/entrypoints/openai/test_vision.py
+++ b/tests/entrypoints/openai/test_vision.py
@@ -122,7 +122,6 @@ def get_hf_prompt_tokens(model_name, content, image_url):
 
 
 @pytest.mark.asyncio
-@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
 async def test_single_chat_session_image(
@@ -170,7 +169,6 @@ async def test_single_chat_session_image(
 
 
 @pytest.mark.asyncio
-@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
 async def test_error_on_invalid_image_url_type(
@@ -198,7 +196,6 @@ async def test_error_on_invalid_image_url_type(
 
 
 @pytest.mark.asyncio
-@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
 async def test_single_chat_session_image_beamsearch(
@@ -224,7 +221,6 @@ async def test_single_chat_session_image_beamsearch(
 
 
 @pytest.mark.asyncio
-@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize("raw_image_url", TEST_IMAGE_ASSETS)
 @pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
@@ -281,7 +277,6 @@ async def test_single_chat_session_image_base64encoded(
 
 
 @pytest.mark.asyncio
-@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize("image_idx", list(range(len(TEST_IMAGE_ASSETS))))
 async def test_single_chat_session_image_base64encoded_beamsearch(
@@ -312,7 +307,6 @@ async def test_single_chat_session_image_base64encoded_beamsearch(
 
 
 @pytest.mark.asyncio
-@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
 async def test_chat_streaming_image(
@@ -356,7 +350,6 @@ async def test_chat_streaming_image(
 
 
 @pytest.mark.asyncio
-@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize(
     "image_urls",
@@ -398,7 +391,6 @@ async def test_multi_image_input(
 
 
 @pytest.mark.asyncio
-@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize(
     "image_urls",
@@ -438,7 +430,6 @@ async def test_completions_with_image(
 
 
 @pytest.mark.asyncio
-@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize(
     "image_urls",
@@ -502,7 +493,6 @@ async def test_completions_with_image_with_uuid(
 
 
 @pytest.mark.asyncio
-@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 async def test_completions_with_empty_image_with_uuid_without_cache_hit(
     client: openai.AsyncOpenAI,
@@ -532,7 +522,6 @@ async def test_completions_with_empty_image_with_uuid_without_cache_hit(
 
 
 @pytest.mark.asyncio
-@pytest.mark.encoder_decoder
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 @pytest.mark.parametrize(
     "image_urls",
diff --git a/tests/entrypoints/pooling/correctness/test_mteb_score.py b/tests/entrypoints/pooling/correctness/test_mteb_score.py
index 50d73f080e65..1afe68b189db 100644
--- a/tests/entrypoints/pooling/correctness/test_mteb_score.py
+++ b/tests/entrypoints/pooling/correctness/test_mteb_score.py
@@ -28,7 +28,6 @@ def server():
         yield remote_server
 
 
-@pytest.mark.encoder_decoder
 def test_mteb_score(server):
     url = server.url_for("score")
     encoder = ScoreClientMtebEncoder(MODEL_NAME, url)
@@ -43,7 +42,6 @@ def test_mteb_score(server):
     assert st_main_score - vllm_main_score < MTEB_RERANK_TOL
 
 
-@pytest.mark.encoder_decoder
 def test_mteb_rerank(server):
     url = server.url_for("rerank")
     encoder = RerankClientMtebEncoder(MODEL_NAME, url)
diff --git a/tests/entrypoints/pooling/llm/test_embedding.py b/tests/entrypoints/pooling/llm/test_embedding.py
index 5c55f9d3b789..5455b5f91fc0 100644
--- a/tests/entrypoints/pooling/llm/test_embedding.py
+++ b/tests/entrypoints/pooling/llm/test_embedding.py
@@ -10,8 +10,6 @@
 from vllm import LLM, PoolingParams
 from vllm.distributed import cleanup_dist_env_and_memory
 
-pytestmark = pytest.mark.encoder_decoder
-
 MODEL_NAME = "intfloat/multilingual-e5-small"
 
 prompts = ["The chef prepared a delicious meal."]
diff --git a/tests/entrypoints/pooling/llm/test_encode.py b/tests/entrypoints/pooling/llm/test_encode.py
index dd98857f6025..ca85d2758fce 100644
--- a/tests/entrypoints/pooling/llm/test_encode.py
+++ b/tests/entrypoints/pooling/llm/test_encode.py
@@ -8,8 +8,6 @@
 from vllm import LLM, PoolingParams
 from vllm.distributed import cleanup_dist_env_and_memory
 
-pytestmark = pytest.mark.encoder_decoder
-
 MODEL_NAME = "intfloat/multilingual-e5-small"
 
 PROMPTS = [
diff --git a/tests/entrypoints/pooling/openai/test_embedding.py b/tests/entrypoints/pooling/openai/test_embedding.py
index a7e4e857d2ee..e971b23e8f1a 100644
--- a/tests/entrypoints/pooling/openai/test_embedding.py
+++ b/tests/entrypoints/pooling/openai/test_embedding.py
@@ -28,8 +28,6 @@
     decode_pooling_output,
 )
 
-pytestmark = pytest.mark.encoder_decoder
-
 MODEL_NAME = "intfloat/multilingual-e5-small"
 DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}"""  # noqa: E501
 DTYPE = "bfloat16"
diff --git a/tests/entrypoints/pooling/openai/test_embedding_long_text.py b/tests/entrypoints/pooling/openai/test_embedding_long_text.py
index 4a24201af3df..f977c81a9084 100644
--- a/tests/entrypoints/pooling/openai/test_embedding_long_text.py
+++ b/tests/entrypoints/pooling/openai/test_embedding_long_text.py
@@ -17,8 +17,6 @@
 from tests.utils import RemoteOpenAIServer
 from vllm.entrypoints.openai.protocol import EmbeddingResponse
 
-pytestmark = pytest.mark.encoder_decoder
-
 
 def _generate_random_text(word_count: int) -> str:
     """Generate random text with approximately the specified word count."""
diff --git a/tests/entrypoints/pooling/openai/test_rerank.py b/tests/entrypoints/pooling/openai/test_rerank.py
index 1d065c7687aa..1d85190c12a1 100644
--- a/tests/entrypoints/pooling/openai/test_rerank.py
+++ b/tests/entrypoints/pooling/openai/test_rerank.py
@@ -9,8 +9,6 @@
 from tests.utils import RemoteOpenAIServer
 from vllm.entrypoints.openai.protocol import PoolingResponse, RerankResponse
 
-pytestmark = pytest.mark.encoder_decoder
-
 MODEL_NAME = "BAAI/bge-reranker-base"
 DTYPE = "bfloat16"
 
diff --git a/tests/entrypoints/pooling/openai/test_vision_embedding.py b/tests/entrypoints/pooling/openai/test_vision_embedding.py
index 3d0fbc9a38a9..944392d66fa5 100644
--- a/tests/entrypoints/pooling/openai/test_vision_embedding.py
+++ b/tests/entrypoints/pooling/openai/test_vision_embedding.py
@@ -11,8 +11,6 @@
 from vllm.entrypoints.openai.protocol import EmbeddingResponse
 from vllm.multimodal.utils import encode_image_base64, fetch_image
 
-pytestmark = pytest.mark.encoder_decoder
-
 MODEL_NAME = "TIGER-Lab/VLM2Vec-Full"
 MAXIMUM_IMAGES = 2
 

From adce9198c8cbc48ccc077fe00256b1b741ac9160 Mon Sep 17 00:00:00 2001
From: zhewenli <zhewenli@meta.com>
Date: Thu, 6 Nov 2025 13:31:23 -0800
Subject: [PATCH 3/6] update tests

Signed-off-by: zhewenli <zhewenli@meta.com>
---
 .buildkite/test-amd.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index cccfd73ef8a4..f7826187072b 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -165,6 +165,7 @@ steps:
       --ignore=entrypoints/openai/correctness/
       --ignore=entrypoints/openai/test_collective_rpc.py
       --ignore=entrypoints/openai/tool_parsers/
+      # Skip encoder models on ROCm: https://github.com/vllm-project/vllm/issues/27442
       --ignore=entrypoints/openai/test_vision.py
       --ignore=entrypoints/openai/test_optional_middleware.py
   - pytest -v -s entrypoints/test_chat_utils.py
@@ -182,8 +183,7 @@ steps:
   - tests/entrypoints/pooling
   commands:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  # Encoder-only models not supported on ROCm (all tests use encoder/cross-encoder models)
-  # See .buildkite/rocm_encoder_decoder_skip.md for details
+  # Skip encoder models on ROCm: https://github.com/vllm-project/vllm/issues/27442
   - >-
       pytest -v -s entrypoints/pooling
       --ignore=entrypoints/pooling/correctness/test_mteb_score.py

From ffc3b430a3a01c6fbfd6bd27d86e3498cb44e6a6 Mon Sep 17 00:00:00 2001
From: zhewenli <zhewenli@meta.com>
Date: Thu, 6 Nov 2025 14:02:31 -0800
Subject: [PATCH 4/6] update tests

Signed-off-by: zhewenli <zhewenli@meta.com>
---
 .buildkite/test-amd.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index f7826187072b..9000e0b9acb9 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -165,9 +165,9 @@ steps:
       --ignore=entrypoints/openai/correctness/
       --ignore=entrypoints/openai/test_collective_rpc.py
       --ignore=entrypoints/openai/tool_parsers/
-      # Skip encoder models on ROCm: https://github.com/vllm-project/vllm/issues/27442
       --ignore=entrypoints/openai/test_vision.py
       --ignore=entrypoints/openai/test_optional_middleware.py
+  # ^ Skip encoder models (test_vision.py, test_optional_middleware.py) on ROCm: https://github.com/vllm-project/vllm/issues/27442
   - pytest -v -s entrypoints/test_chat_utils.py
 
 - label: Entrypoints Integration Test (Pooling)

From bfadf5c4e97fb4eed26a9779f4b938c9ba80598c Mon Sep 17 00:00:00 2001
From: zhewenli <zhewenli@meta.com>
Date: Thu, 6 Nov 2025 17:40:27 -0800
Subject: [PATCH 5/6] update tests

Signed-off-by: zhewenli <zhewenli@meta.com>
---
 .buildkite/test-amd.yaml | 4 +---
 tests/conftest.py        | 3 ---
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index 9000e0b9acb9..f098b4f74a4e 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -165,9 +165,8 @@ steps:
       --ignore=entrypoints/openai/correctness/
       --ignore=entrypoints/openai/test_collective_rpc.py
       --ignore=entrypoints/openai/tool_parsers/
-      --ignore=entrypoints/openai/test_vision.py
       --ignore=entrypoints/openai/test_optional_middleware.py
-  # ^ Skip encoder models (test_vision.py, test_optional_middleware.py) on ROCm: https://github.com/vllm-project/vllm/issues/27442
+  # Skip test_optional_middleware.py since encoder models are supported on ROCm: https://github.com/vllm-project/vllm/issues/27442
   - pytest -v -s entrypoints/test_chat_utils.py
 
 - label: Entrypoints Integration Test (Pooling)
@@ -196,7 +195,6 @@ steps:
       --ignore=entrypoints/pooling/openai/test_rerank.py
       --ignore=entrypoints/pooling/openai/test_score.py
       --ignore=entrypoints/pooling/openai/test_truncation.py
-      --ignore=entrypoints/pooling/openai/test_vision_embedding.py
 
 - label: Distributed Tests (4 GPUs) # 35min
   timeout_in_minutes: 50
diff --git a/tests/conftest.py b/tests/conftest.py
index b1bf880cc29f..637a96e62c0c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1263,8 +1263,6 @@ def pytest_collection_modifyitems(config, items):
         # Encoder-decoder models
         "openai/whisper-small",
         "openai/whisper-large-v3-turbo",
-        "mistralai/Voxtral-Mini-3B-2507",
-        "microsoft/Phi-3.5-vision-instruct",
         # Encoder-only models (cross-encoders, embedding models)
         "cross-encoder/ms-marco-MiniLM-L-6-v2",
         "intfloat/e5-small",
@@ -1272,7 +1270,6 @@ def pytest_collection_modifyitems(config, items):
         "BAAI/bge-reranker-base",
         "BAAI/bge-reranker-v2-m3",
         "BAAI/bge-base-en-v1.5",
-        "TIGER-Lab/VLM2Vec-Full",
         "Snowflake/snowflake-arctic-embed-m-v1.5",
         "sentence-transformers/all-MiniLM-L12-v2",
         "sentence-transformers/stsb-roberta-base-v2",

From 616ae433f70037b66f96eaec6034cb5ff6d2ee8d Mon Sep 17 00:00:00 2001
From: zhewenli <zhewenli@meta.com>
Date: Thu, 6 Nov 2025 19:48:26 -0800
Subject: [PATCH 6/6] update tests

Signed-off-by: zhewenli <zhewenli@meta.com>
---
 .buildkite/test-amd.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index f098b4f74a4e..93d966f83be4 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -169,8 +169,8 @@ steps:
   # Skip test_optional_middleware.py since encoder models are supported on ROCm: https://github.com/vllm-project/vllm/issues/27442
   - pytest -v -s entrypoints/test_chat_utils.py
 
-- label: Entrypoints Integration Test (Pooling)
-  timeout_in_minutes: 50
+- label: Entrypoints Integration Test (Pooling) # 10min
+  timeout_in_minutes: 15
   mirror_hardwares: [amdexperimental, amdproduction]
   agent_pool: mi325_1
   # grade: Blocking
@@ -748,7 +748,7 @@ steps:
 
 - label: Language Models Tests (Standard)
   timeout_in_minutes: 25
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   agent_pool: mi325_1
   # grade: Blocking
   torch_nightly: true