Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions tests/_test_utils/deploy_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,8 @@ def _deploy_trtllm_impl(self):
qwen3_models = (
"nvidia/Qwen3-Next-80B-A3B-Instruct-NVFP4",
"nvidia/Qwen3-Next-80B-A3B-Thinking-NVFP4",
"nvidia/EAGLE3-NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
"/s3/nvidia/EAGLE3-NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
)
nemotron_models = (
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",
Expand Down
5 changes: 5 additions & 0 deletions tests/_test_utils/examples/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ def _select_path(remote_id: str, local_id: str) -> str:
)

# Diffusers
FLUX_DEV_PATH = _select_path(
remote_id="black-forest-labs/FLUX.1-dev",
local_id="black-forest-labs/FLUX.1-dev",
)
Comment on lines +72 to +75
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Use a lightweight default remote for FLUX_DEV_PATH to avoid slow integration runs.

Line 73 points to the full black-forest-labs/FLUX.1-dev checkpoint when no local root is set; that can make the default tests/examples flow very slow and brittle.

⚙️ Proposed fix
+_FLUX_DEV_REMOTE_ID = os.getenv("MODELOPT_TEST_FLUX_DEV_REMOTE_ID", "hf-internal-testing/tiny-flux-pipe")
+
 FLUX_DEV_PATH = _select_path(
-    remote_id="black-forest-labs/FLUX.1-dev",
+    remote_id=_FLUX_DEV_REMOTE_ID,
     local_id="black-forest-labs/FLUX.1-dev",
 )

As per coding guidelines, tests/examples/**/*.py: "Integration tests in tests/examples/ should not take more than a few minutes to run".

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@tests/_test_utils/examples/models.py` around lines 72 - 75, FLUX_DEV_PATH
currently points to the heavy remote checkpoint "black-forest-labs/FLUX.1-dev"
causing slow tests; update the _select_path call for FLUX_DEV_PATH to use a
lightweight default remote checkpoint (or a local placeholder) instead of the
full remote id so tests/examples run quickly. Locate the FLUX_DEV_PATH
definition and change the remote_id argument passed to _select_path (and/or set
a lightweight local_id) to a small test-friendly model identifier to avoid
fetching the full checkpoint during CI.


FLUX_SCHNELL_PATH = _select_path(
remote_id="hf-internal-testing/tiny-flux-pipe",
local_id="black-forest-labs/FLUX.1-schnell",
Expand Down
14 changes: 13 additions & 1 deletion tests/examples/diffusers/test_diffusers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from typing import NamedTuple

import pytest
from _test_utils.examples.models import FLUX_SCHNELL_PATH, SD3_PATH, SDXL_1_0_PATH
from _test_utils.examples.models import FLUX_DEV_PATH, FLUX_SCHNELL_PATH, SD3_PATH, SDXL_1_0_PATH
from _test_utils.examples.run_command import run_example_command
from _test_utils.torch.misc import minimum_sm

Expand Down Expand Up @@ -99,6 +99,17 @@ def inference(self, tmp_path: Path) -> None:
@pytest.mark.parametrize(
"model",
[
pytest.param(
DiffuserModel(
name="flux-dev",
path=FLUX_DEV_PATH,
dtype="BFloat16",
format_type="fp8",
quant_algo="max",
collect_method="default",
),
marks=minimum_sm(89),
),
DiffuserModel(
name="flux-schnell",
path=FLUX_SCHNELL_PATH,
Expand Down Expand Up @@ -136,6 +147,7 @@ def inference(self, tmp_path: Path) -> None:
),
],
ids=[
"flux_dev_bf16_fp8_max_3.0_default",
"flux_schnell_bf16_int8_smoothquant_3.0_min_mean",
"sd3_medium_fp16_int8_smoothquant_3.0_min_mean",
"sdxl_1.0_fp16_fp8_max_3.0_default",
Expand Down
80 changes: 79 additions & 1 deletion tests/examples/llm_ptq/test_deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,12 @@ def test_llama(command):
tensor_parallel_size=4,
mini_sm=100,
),
*ModelDeployerList(
model_id="nvidia/Qwen3-VL-235B-A22B-Instruct-NVFP4",
backend=("trtllm", "vllm", "sglang"),
tensor_parallel_size=8,
mini_sm=100,
),
*ModelDeployerList(
model_id="nvidia/Qwen3-30B-A3B-NVFP4",
backend=("trtllm", "vllm", "sglang"),
Expand Down Expand Up @@ -295,6 +301,28 @@ def test_qwen(command):
command.run()


@pytest.mark.parametrize(
"command",
[
*ModelDeployerList(
model_id="nvidia/GLM-4.7-NVFP4",
backend=("trtllm", "vllm", "sglang"),
tensor_parallel_size=8,
mini_sm=100,
),
*ModelDeployerList(
model_id="nvidia/GLM-5-NVFP4",
backend=("trtllm", "vllm", "sglang"),
tensor_parallel_size=8,
mini_sm=100,
),
],
ids=idfn,
)
def test_glm(command):
command.run()


@pytest.mark.parametrize(
"command",
[
Expand Down Expand Up @@ -346,6 +374,13 @@ def test_mixtral(command):
mini_sm=89,
attn_backend="FLASHINFER",
),
*ModelDeployerList(
model_id="nvidia/Gemma-4-31B-IT-NVFP4",
backend=("trtllm", "vllm", "sglang"),
tensor_parallel_size=1,
mini_sm=100,
attn_backend="FLASHINFER",
),
],
ids=idfn,
)
Expand Down Expand Up @@ -451,6 +486,18 @@ def test_kimi(command):
mini_sm=89,
attn_backend="FLASHINFER",
),
*ModelDeployerList(
model_id="nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8",
backend=("trtllm", "vllm", "sglang"),
tensor_parallel_size=8,
mini_sm=89,
),
*ModelDeployerList(
model_id="nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-NVFP4",
backend=("trtllm", "vllm", "sglang"),
tensor_parallel_size=8,
mini_sm=100,
),
],
ids=idfn,
)
Expand Down Expand Up @@ -482,7 +529,6 @@ def test_llama_nemotron(command):
],
ids=idfn,
)
@pytest.mark.skip(reason="Medusa is not supported yet")
def test_medusa(command):
command.run()

Expand All @@ -497,6 +543,22 @@ def test_medusa(command):
tensor_parallel_size=8,
mini_sm=89,
),
*ModelDeployerList(
base_model="nvidia/Kimi-K2-Thinking-NVFP4",
model_id="nvidia/Kimi-K2-Thinking-Eagle3",
backend=("trtllm", "sglang"),
tensor_parallel_size=8,
mini_sm=100,
eagle3_one_model=False,
),
*ModelDeployerList(
base_model="nvidia/Kimi-K2.5-NVFP4",
model_id="nvidia/Kimi-K2.5-Thinking-Eagle3",
backend=("trtllm", "sglang"),
tensor_parallel_size=8,
mini_sm=100,
eagle3_one_model=False,
),
*ModelDeployerList(
base_model="Qwen/Qwen3-235B-A22B",
model_id="nvidia/Qwen3-235B-A22B-Eagle3",
Expand Down Expand Up @@ -588,3 +650,19 @@ def test_eagle(command):
command.run()
else:
pytest.skip(f"Local model not found: {local_path}")


@pytest.mark.parametrize(
"command",
[
*ModelDeployerList(
model_id="nvidia/MiniMax-M2.5-NVFP4",
backend=("trtllm", "vllm", "sglang"),
tensor_parallel_size=8,
mini_sm=100,
),
],
ids=idfn,
)
def test_minimax(command):
command.run()
Loading