add test for Nemotron 3 Nano

noeyy-mino · noeyy-mino · commit ced4b6c881c3 · 2025-12-25T05:38:53.000Z
Signed-off-by: noeyy-mino &lt;174223378+noeyy-mino@users.noreply.github.com&gt;
diff --git a/tests/_test_utils/deploy_utils.py b/tests/_test_utils/deploy_utils.py
@@ -100,7 +100,40 @@ def _deploy_trtllm(self):
         spec_config = None
         llm = None
         kv_cache_config = KvCacheConfig(enable_block_reuse=True, free_gpu_memory_fraction=0.8)
-        if "eagle" in self.model_id.lower():
+
+        if self.model_id == "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8":
+            llm = LLM(
+                model=self.model_id,
+                tensor_parallel_size=self.tensor_parallel_size,
+                enable_attention_dp=False,
+                attn_backend=self.attn_backend,
+                trust_remote_code=True,
+                max_batch_size=8,
+                kv_cache_config=KvCacheConfig(
+                    enable_block_reuse=False,
+                    mamba_ssm_cache_dtype="float32",
+                ),
+            )
+        elif self.model_id == "nvidia/EAGLE3-NVIDIA-Nemotron-3-Nano-30B-A3B-BF16":
+            spec_config = EagleDecodingConfig(
+                max_draft_len=3,
+                speculative_model_dir=self.model_id,
+                eagle3_one_model=self.eagle3_one_model,
+            )
+            llm = LLM(
+                model=self.model_id,
+                tensor_parallel_size=self.tensor_parallel_size,
+                enable_attention_dp=False,
+                attn_backend=self.attn_backend,
+                trust_remote_code=True,
+                max_batch_size=8,
+                speculative_config=spec_config,
+                kv_cache_config=KvCacheConfig(
+                    enable_block_reuse=False,
+                    mamba_ssm_cache_dtype="float32",
+                ),
+            )
+        elif "eagle" in self.model_id.lower():
             spec_config = EagleDecodingConfig(
                 max_draft_len=3,
                 speculative_model_dir=self.model_id,
@@ -197,6 +230,14 @@ def _deploy_sglang(self):
                 mem_fraction_static=0.7,
                 context_length=1024,
             )
+        elif self.model_id == "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8":
+            llm = sgl.Engine(
+                model_path=self.model_id,
+                quantization=quantization_method,
+                tp_size=self.tensor_parallel_size,
+                trust_remote_code=True,
+                attention_backend="flashinfer",
+            )
         else:
             llm = sgl.Engine(
                 model_path=self.model_id,
diff --git a/tests/examples/llm_ptq/test_deploy.py b/tests/examples/llm_ptq/test_deploy.py
@@ -386,6 +386,13 @@ def test_kimi(command):
             tensor_parallel_size=8,
             mini_sm=89,
         ),
+        *ModelDeployerList(
+            model_id="nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",
+            backend=("trtllm", "vllm", "sglang"),
+            tensor_parallel_size=1,
+            mini_sm=89,
+            attn_backend="FLASHINFER",
+        ),
     ],
     ids=idfn,
 )
@@ -486,7 +493,8 @@ def test_medusa(command):
         *ModelDeployerList(
             base_model="nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
             model_id="nvidia/EAGLE3-NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
-            backend=("trtllm", "sglang"),
+            backend=("trtllm", "vllm", "sglang"),
+            eagle3_one_model=False,
             tensor_parallel_size=8,
             mini_sm=89,
         ),
@@ -501,4 +509,16 @@ def test_medusa(command):
     ids=idfn,
 )
 def test_eagle(command):
-    command.run()
+    """Skip test if MODELOPT_LOCAL_MODEL_ROOT is set but model doesn't exist locally.
+    speculative models shoule be loaded by local path"""
+    local_root = os.getenv("MODELOPT_LOCAL_MODEL_ROOT")
+    if not local_root:
+        return
+
+    local_path = os.path.join(local_root, command.model_id)
+    if os.path.isdir(local_path):
+        # Update model_id to use local path
+        command.model_id = local_path
+        command.run()
+    else:
+        pytest.skip(f"Local model not found: {local_path}")