Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 44 additions & 3 deletions tests/_test_utils/deploy_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,40 @@ def _deploy_trtllm(self):
spec_config = None
llm = None
kv_cache_config = KvCacheConfig(enable_block_reuse=True, free_gpu_memory_fraction=0.8)
if "eagle" in self.model_id.lower():

if self.model_id == "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8":
llm = LLM(
model=self.model_id,
tensor_parallel_size=self.tensor_parallel_size,
enable_attention_dp=False,
attn_backend=self.attn_backend,
trust_remote_code=True,
max_batch_size=8,
kv_cache_config=KvCacheConfig(
enable_block_reuse=False,
mamba_ssm_cache_dtype="float32",
),
)
elif self.model_id == "nvidia/EAGLE3-NVIDIA-Nemotron-3-Nano-30B-A3B-BF16":
spec_config = EagleDecodingConfig(
max_draft_len=3,
speculative_model_dir=self.model_id,
eagle3_one_model=self.eagle3_one_model,
)
llm = LLM(
model=self.model_id,
tensor_parallel_size=self.tensor_parallel_size,
enable_attention_dp=False,
attn_backend=self.attn_backend,
trust_remote_code=True,
max_batch_size=8,
speculative_config=spec_config,
kv_cache_config=KvCacheConfig(
enable_block_reuse=False,
mamba_ssm_cache_dtype="float32",
),
)
elif "eagle" in self.model_id.lower():
spec_config = EagleDecodingConfig(
max_draft_len=3,
speculative_model_dir=self.model_id,
Expand Down Expand Up @@ -146,7 +179,7 @@ def _deploy_vllm(self):
pytest.skip("vllm package not available")

quantization_method = "modelopt"
if "FP4" in self.model_id:
if "FP4" in self.model_id.lower():
quantization_method = "modelopt_fp4"
llm = LLM(
model=self.model_id,
Expand Down Expand Up @@ -182,7 +215,7 @@ def _deploy_sglang(self):
except ImportError:
pytest.skip("sglang package not available")
quantization_method = "modelopt"
if "FP4" in self.model_id:
if "FP4" in self.model_id.lower():
quantization_method = "modelopt_fp4"
if "eagle" in self.model_id.lower():
llm = sgl.Engine(
Expand All @@ -197,6 +230,14 @@ def _deploy_sglang(self):
mem_fraction_static=0.7,
context_length=1024,
)
elif self.model_id == "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8":
llm = sgl.Engine(
model_path=self.model_id,
quantization=quantization_method,
tp_size=self.tensor_parallel_size,
trust_remote_code=True,
attention_backend="flashinfer",
)
else:
llm = sgl.Engine(
model_path=self.model_id,
Expand Down
27 changes: 15 additions & 12 deletions tests/examples/cnn_qat/test_resnet50.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,19 @@
from _test_utils.examples.run_command import run_example_command
from _test_utils.torch.misc import minimum_gpu

imagenet_path = os.getenv("IMAGENET_PATH")
skip_no_imagenet = pytest.mark.skipif(
not imagenet_path or not os.path.isdir(imagenet_path),
reason="IMAGENET_PATH environment variable is not set or does not point to a valid directory",
)

@pytest.fixture
def imagenet_path():
"""Fixture to get IMAGENET_PATH from environment and skip if not valid."""
path = os.getenv("IMAGENET_PATH")
if not path or not os.path.isdir(path):
pytest.skip(
"IMAGENET_PATH environment variable is not set or does not point to a valid directory"
)
return path

def _build_common_command():

def _build_common_command(imagenet_path):
"""Build common command arguments for CNN QAT training."""
train_data_path = os.path.join(imagenet_path, "train")
val_data_path = os.path.join(imagenet_path, "val")
Expand Down Expand Up @@ -58,21 +63,19 @@ def _run_qat_command(base_cmd, common_args, output_dir, example_dir="cnn_qat"):
run_example_command(full_command, example_dir)


@skip_no_imagenet
@minimum_gpu(1)
def test_cnn_qat_single_gpu(tmp_path):
def test_cnn_qat_single_gpu(tmp_path, imagenet_path):
"""Test CNN QAT on single GPU."""
common_args = _build_common_command()
common_args = _build_common_command(imagenet_path)
base_command = ["python", "torchvision_qat.py", "--gpu", "0"]

_run_qat_command(base_command, common_args, tmp_path)


@skip_no_imagenet
@minimum_gpu(2)
def test_cnn_qat_multi_gpu(tmp_path):
def test_cnn_qat_multi_gpu(tmp_path, imagenet_path):
"""Test CNN QAT on multiple GPUs."""
common_args = _build_common_command()
common_args = _build_common_command(imagenet_path)
base_command = ["torchrun", "--nproc_per_node=2", "torchvision_qat.py"]

_run_qat_command(base_command, common_args, tmp_path)
40 changes: 37 additions & 3 deletions tests/examples/llm_ptq/test_deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,13 @@ def test_kimi(command):
tensor_parallel_size=8,
mini_sm=89,
),
*ModelDeployerList(
model_id="nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",
backend=("trtllm", "vllm", "sglang"),
tensor_parallel_size=1,
mini_sm=89,
attn_backend="FLASHINFER",
),
],
ids=idfn,
)
Expand Down Expand Up @@ -464,18 +471,33 @@ def test_medusa(command):
),
*ModelDeployerList(
base_model="openai/gpt-oss-120b",
model_id="nvidia/gpt-oss-120b-Eagle3",
model_id="nvidia/gpt-oss-120b-Eagle3-long-context",
backend=("trtllm", "sglang"),
tensor_parallel_size=8,
mini_sm=89,
),
*ModelDeployerList(
base_model="openai/gpt-oss-120b",
model_id="nvidia/gpt-oss-120b-Eagle3-short-context",
backend=("trtllm", "sglang"),
tensor_parallel_size=8,
mini_sm=89,
),
*ModelDeployerList(
base_model="openai/gpt-oss-120b",
model_id="nvidia/gpt-oss-120b-Eagle3-v2",
model_id="nvidia/gpt-oss-120b-Eagle3-throughput",
backend=("trtllm", "sglang"),
tensor_parallel_size=8,
mini_sm=89,
),
*ModelDeployerList(
base_model="nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
model_id="nvidia/EAGLE3-NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
backend=("trtllm", "vllm", "sglang"),
eagle3_one_model=False,
tensor_parallel_size=8,
mini_sm=89,
),
*ModelDeployerList(
base_model="nvidia/Llama-3.3-70B-Instruct-FP8",
model_id="nvidia/Llama-3.3-70B-Instruct-Eagle3",
Expand All @@ -487,4 +509,16 @@ def test_medusa(command):
ids=idfn,
)
def test_eagle(command):
command.run()
"""Skip test if MODELOPT_LOCAL_MODEL_ROOT is set but model doesn't exist locally.
speculative models shoule be loaded by local path"""
local_root = os.getenv("MODELOPT_LOCAL_MODEL_ROOT")
if not local_root:
return

local_path = os.path.join(local_root, command.model_id)
if os.path.isdir(local_path):
# Update model_id to use local path
command.model_id = local_path
command.run()
else:
pytest.skip(f"Local model not found: {local_path}")