diff --git a/tests/_test_utils/deploy_utils.py b/tests/_test_utils/deploy_utils.py index 7a1897ccb..4a0bfffbc 100644 --- a/tests/_test_utils/deploy_utils.py +++ b/tests/_test_utils/deploy_utils.py @@ -100,7 +100,40 @@ def _deploy_trtllm(self): spec_config = None llm = None kv_cache_config = KvCacheConfig(enable_block_reuse=True, free_gpu_memory_fraction=0.8) - if "eagle" in self.model_id.lower(): + + if self.model_id == "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8": + llm = LLM( + model=self.model_id, + tensor_parallel_size=self.tensor_parallel_size, + enable_attention_dp=False, + attn_backend=self.attn_backend, + trust_remote_code=True, + max_batch_size=8, + kv_cache_config=KvCacheConfig( + enable_block_reuse=False, + mamba_ssm_cache_dtype="float32", + ), + ) + elif self.model_id == "nvidia/EAGLE3-NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": + spec_config = EagleDecodingConfig( + max_draft_len=3, + speculative_model_dir=self.model_id, + eagle3_one_model=self.eagle3_one_model, + ) + llm = LLM( + model=self.model_id, + tensor_parallel_size=self.tensor_parallel_size, + enable_attention_dp=False, + attn_backend=self.attn_backend, + trust_remote_code=True, + max_batch_size=8, + speculative_config=spec_config, + kv_cache_config=KvCacheConfig( + enable_block_reuse=False, + mamba_ssm_cache_dtype="float32", + ), + ) + elif "eagle" in self.model_id.lower(): spec_config = EagleDecodingConfig( max_draft_len=3, speculative_model_dir=self.model_id, @@ -146,7 +179,7 @@ def _deploy_vllm(self): pytest.skip("vllm package not available") quantization_method = "modelopt" - if "FP4" in self.model_id: + if "FP4" in self.model_id.lower(): quantization_method = "modelopt_fp4" llm = LLM( model=self.model_id, @@ -182,7 +215,7 @@ def _deploy_sglang(self): except ImportError: pytest.skip("sglang package not available") quantization_method = "modelopt" - if "FP4" in self.model_id: + if "FP4" in self.model_id.lower(): quantization_method = "modelopt_fp4" if "eagle" in self.model_id.lower(): llm = sgl.Engine( @@ -197,6 +230,14 @@ def _deploy_sglang(self): mem_fraction_static=0.7, context_length=1024, ) + elif self.model_id == "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8": + llm = sgl.Engine( + model_path=self.model_id, + quantization=quantization_method, + tp_size=self.tensor_parallel_size, + trust_remote_code=True, + attention_backend="flashinfer", + ) else: llm = sgl.Engine( model_path=self.model_id, diff --git a/tests/examples/cnn_qat/test_resnet50.py b/tests/examples/cnn_qat/test_resnet50.py index 21b2e4860..dba47587e 100644 --- a/tests/examples/cnn_qat/test_resnet50.py +++ b/tests/examples/cnn_qat/test_resnet50.py @@ -19,14 +19,19 @@ from _test_utils.examples.run_command import run_example_command from _test_utils.torch.misc import minimum_gpu -imagenet_path = os.getenv("IMAGENET_PATH") -skip_no_imagenet = pytest.mark.skipif( - not imagenet_path or not os.path.isdir(imagenet_path), - reason="IMAGENET_PATH environment variable is not set or does not point to a valid directory", -) +@pytest.fixture +def imagenet_path(): + """Fixture to get IMAGENET_PATH from environment and skip if not valid.""" + path = os.getenv("IMAGENET_PATH") + if not path or not os.path.isdir(path): + pytest.skip( + "IMAGENET_PATH environment variable is not set or does not point to a valid directory" + ) + return path -def _build_common_command(): + +def _build_common_command(imagenet_path): """Build common command arguments for CNN QAT training.""" train_data_path = os.path.join(imagenet_path, "train") val_data_path = os.path.join(imagenet_path, "val") @@ -58,21 +63,19 @@ def _run_qat_command(base_cmd, common_args, output_dir, example_dir="cnn_qat"): run_example_command(full_command, example_dir) -@skip_no_imagenet @minimum_gpu(1) -def test_cnn_qat_single_gpu(tmp_path): +def test_cnn_qat_single_gpu(tmp_path, imagenet_path): """Test CNN QAT on single GPU.""" - common_args = _build_common_command() + common_args = _build_common_command(imagenet_path) base_command = ["python", "torchvision_qat.py", "--gpu", "0"] _run_qat_command(base_command, common_args, tmp_path) -@skip_no_imagenet @minimum_gpu(2) -def test_cnn_qat_multi_gpu(tmp_path): +def test_cnn_qat_multi_gpu(tmp_path, imagenet_path): """Test CNN QAT on multiple GPUs.""" - common_args = _build_common_command() + common_args = _build_common_command(imagenet_path) base_command = ["torchrun", "--nproc_per_node=2", "torchvision_qat.py"] _run_qat_command(base_command, common_args, tmp_path) diff --git a/tests/examples/llm_ptq/test_deploy.py b/tests/examples/llm_ptq/test_deploy.py index 3d3229e01..868304f48 100644 --- a/tests/examples/llm_ptq/test_deploy.py +++ b/tests/examples/llm_ptq/test_deploy.py @@ -386,6 +386,13 @@ def test_kimi(command): tensor_parallel_size=8, mini_sm=89, ), + *ModelDeployerList( + model_id="nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8", + backend=("trtllm", "vllm", "sglang"), + tensor_parallel_size=1, + mini_sm=89, + attn_backend="FLASHINFER", + ), ], ids=idfn, ) @@ -464,18 +471,33 @@ def test_medusa(command): ), *ModelDeployerList( base_model="openai/gpt-oss-120b", - model_id="nvidia/gpt-oss-120b-Eagle3", + model_id="nvidia/gpt-oss-120b-Eagle3-long-context", + backend=("trtllm", "sglang"), + tensor_parallel_size=8, + mini_sm=89, + ), + *ModelDeployerList( + base_model="openai/gpt-oss-120b", + model_id="nvidia/gpt-oss-120b-Eagle3-short-context", backend=("trtllm", "sglang"), tensor_parallel_size=8, mini_sm=89, ), *ModelDeployerList( base_model="openai/gpt-oss-120b", - model_id="nvidia/gpt-oss-120b-Eagle3-v2", + model_id="nvidia/gpt-oss-120b-Eagle3-throughput", backend=("trtllm", "sglang"), tensor_parallel_size=8, mini_sm=89, ), + *ModelDeployerList( + base_model="nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", + model_id="nvidia/EAGLE3-NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", + backend=("trtllm", "vllm", "sglang"), + eagle3_one_model=False, + tensor_parallel_size=8, + mini_sm=89, + ), *ModelDeployerList( base_model="nvidia/Llama-3.3-70B-Instruct-FP8", model_id="nvidia/Llama-3.3-70B-Instruct-Eagle3", @@ -487,4 +509,16 @@ def test_medusa(command): ids=idfn, ) def test_eagle(command): - command.run() + """Skip test if MODELOPT_LOCAL_MODEL_ROOT is set but model doesn't exist locally. + speculative models shoule be loaded by local path""" + local_root = os.getenv("MODELOPT_LOCAL_MODEL_ROOT") + if not local_root: + return + + local_path = os.path.join(local_root, command.model_id) + if os.path.isdir(local_path): + # Update model_id to use local path + command.model_id = local_path + command.run() + else: + pytest.skip(f"Local model not found: {local_path}")