NVIDIA · noeyy-mino · Dec 23, 2025 · Dec 25, 2025
diff --git a/tests/_test_utils/deploy_utils.py b/tests/_test_utils/deploy_utils.py
@@ -100,7 +100,40 @@ def _deploy_trtllm(self):
         spec_config = None
         llm = None
         kv_cache_config = KvCacheConfig(enable_block_reuse=True, free_gpu_memory_fraction=0.8)
-        if "eagle" in self.model_id.lower():
+
+        if self.model_id == "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8":
+            llm = LLM(
+                model=self.model_id,
+                tensor_parallel_size=self.tensor_parallel_size,
+                enable_attention_dp=False,
+                attn_backend=self.attn_backend,
+                trust_remote_code=True,
+                max_batch_size=8,
+                kv_cache_config=KvCacheConfig(
+                    enable_block_reuse=False,
+                    mamba_ssm_cache_dtype="float32",
+                ),
+            )
+        elif self.model_id == "nvidia/EAGLE3-NVIDIA-Nemotron-3-Nano-30B-A3B-BF16":
+            spec_config = EagleDecodingConfig(
+                max_draft_len=3,
+                speculative_model_dir=self.model_id,
+                eagle3_one_model=self.eagle3_one_model,
+            )
+            llm = LLM(
+                model=self.model_id,
+                tensor_parallel_size=self.tensor_parallel_size,
+                enable_attention_dp=False,
+                attn_backend=self.attn_backend,
+                trust_remote_code=True,
+                max_batch_size=8,
+                speculative_config=spec_config,
+                kv_cache_config=KvCacheConfig(
+                    enable_block_reuse=False,
+                    mamba_ssm_cache_dtype="float32",
+                ),
+            )
+        elif "eagle" in self.model_id.lower():
             spec_config = EagleDecodingConfig(
                 max_draft_len=3,
                 speculative_model_dir=self.model_id,
@@ -146,7 +179,7 @@ def _deploy_vllm(self):
             pytest.skip("vllm package not available")
 
         quantization_method = "modelopt"
-        if "FP4" in self.model_id:
+        if "FP4" in self.model_id.lower():
             quantization_method = "modelopt_fp4"
         llm = LLM(
             model=self.model_id,
@@ -182,7 +215,7 @@ def _deploy_sglang(self):
         except ImportError:
             pytest.skip("sglang package not available")
         quantization_method = "modelopt"
-        if "FP4" in self.model_id:
+        if "FP4" in self.model_id.lower():
             quantization_method = "modelopt_fp4"
         if "eagle" in self.model_id.lower():
             llm = sgl.Engine(
@@ -197,6 +230,14 @@ def _deploy_sglang(self):
                 mem_fraction_static=0.7,
                 context_length=1024,
             )
+        elif self.model_id == "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8":
+            llm = sgl.Engine(
+                model_path=self.model_id,
+                quantization=quantization_method,
+                tp_size=self.tensor_parallel_size,
+                trust_remote_code=True,
+                attention_backend="flashinfer",
+            )
         else:
             llm = sgl.Engine(
                 model_path=self.model_id,

diff --git a/tests/examples/cnn_qat/test_resnet50.py b/tests/examples/cnn_qat/test_resnet50.py
@@ -19,14 +19,19 @@
 from _test_utils.examples.run_command import run_example_command
 from _test_utils.torch.misc import minimum_gpu
 
-imagenet_path = os.getenv("IMAGENET_PATH")
-skip_no_imagenet = pytest.mark.skipif(
-    not imagenet_path or not os.path.isdir(imagenet_path),
-    reason="IMAGENET_PATH environment variable is not set or does not point to a valid directory",
-)
 
+@pytest.fixture
+def imagenet_path():
+    """Fixture to get IMAGENET_PATH from environment and skip if not valid."""
+    path = os.getenv("IMAGENET_PATH")
+    if not path or not os.path.isdir(path):
+        pytest.skip(
+            "IMAGENET_PATH environment variable is not set or does not point to a valid directory"
+        )
+    return path
 
-def _build_common_command():
+
+def _build_common_command(imagenet_path):
     """Build common command arguments for CNN QAT training."""
     train_data_path = os.path.join(imagenet_path, "train")
     val_data_path = os.path.join(imagenet_path, "val")
@@ -58,21 +63,19 @@ def _run_qat_command(base_cmd, common_args, output_dir, example_dir="cnn_qat"):
     run_example_command(full_command, example_dir)
 
 
-@skip_no_imagenet
 @minimum_gpu(1)
-def test_cnn_qat_single_gpu(tmp_path):
+def test_cnn_qat_single_gpu(tmp_path, imagenet_path):
     """Test CNN QAT on single GPU."""
-    common_args = _build_common_command()
+    common_args = _build_common_command(imagenet_path)
     base_command = ["python", "torchvision_qat.py", "--gpu", "0"]
 
     _run_qat_command(base_command, common_args, tmp_path)
 
 
-@skip_no_imagenet
 @minimum_gpu(2)
-def test_cnn_qat_multi_gpu(tmp_path):
+def test_cnn_qat_multi_gpu(tmp_path, imagenet_path):
     """Test CNN QAT on multiple GPUs."""
-    common_args = _build_common_command()
+    common_args = _build_common_command(imagenet_path)
     base_command = ["torchrun", "--nproc_per_node=2", "torchvision_qat.py"]
 
     _run_qat_command(base_command, common_args, tmp_path)
diff --git a/tests/examples/llm_ptq/test_deploy.py b/tests/examples/llm_ptq/test_deploy.py
@@ -386,6 +386,13 @@ def test_kimi(command):
             tensor_parallel_size=8,
             mini_sm=89,
         ),
+        *ModelDeployerList(
+            model_id="nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",
+            backend=("trtllm", "vllm", "sglang"),
+            tensor_parallel_size=1,
+            mini_sm=89,
+            attn_backend="FLASHINFER",
+        ),
     ],
     ids=idfn,
 )
@@ -464,18 +471,33 @@ def test_medusa(command):
         ),
         *ModelDeployerList(
             base_model="openai/gpt-oss-120b",
-            model_id="nvidia/gpt-oss-120b-Eagle3",
+            model_id="nvidia/gpt-oss-120b-Eagle3-long-context",
+            backend=("trtllm", "sglang"),
+            tensor_parallel_size=8,
+            mini_sm=89,
+        ),
+        *ModelDeployerList(
+            base_model="openai/gpt-oss-120b",
+            model_id="nvidia/gpt-oss-120b-Eagle3-short-context",
             backend=("trtllm", "sglang"),
             tensor_parallel_size=8,
             mini_sm=89,
         ),
         *ModelDeployerList(
             base_model="openai/gpt-oss-120b",
-            model_id="nvidia/gpt-oss-120b-Eagle3-v2",
+            model_id="nvidia/gpt-oss-120b-Eagle3-throughput",
             backend=("trtllm", "sglang"),
             tensor_parallel_size=8,
             mini_sm=89,
         ),
+        *ModelDeployerList(
+            base_model="nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
+            model_id="nvidia/EAGLE3-NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
+            backend=("trtllm", "vllm", "sglang"),
+            eagle3_one_model=False,
+            tensor_parallel_size=8,
+            mini_sm=89,
+        ),
         *ModelDeployerList(
             base_model="nvidia/Llama-3.3-70B-Instruct-FP8",
             model_id="nvidia/Llama-3.3-70B-Instruct-Eagle3",
@@ -487,4 +509,16 @@ def test_medusa(command):
     ids=idfn,
 )
 def test_eagle(command):
-    command.run()
+    """Skip test if MODELOPT_LOCAL_MODEL_ROOT is set but model doesn't exist locally.
+    speculative models shoule be loaded by local path"""
+    local_root = os.getenv("MODELOPT_LOCAL_MODEL_ROOT")
+    if not local_root:
+        return
+
+    local_path = os.path.join(local_root, command.model_id)
+    if os.path.isdir(local_path):
+        # Update model_id to use local path
+        command.model_id = local_path
+        command.run()
+    else:
+        pytest.skip(f"Local model not found: {local_path}")