From f636e2ca3d09d0ac3712a2bb4aed21f549eb7f4a Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Wed, 29 Oct 2025 17:20:59 -0400
Subject: [PATCH 01/41] Allow using vllm image

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/e2e-smoke.list |  6 +++
 tests/e2e/vLLM/run_tests.sh   | 11 ++++-
 tests/e2e/vLLM/test_vllm.py   | 81 ++++++++++++++++++++++++-----------
 3 files changed, 72 insertions(+), 26 deletions(-)
 create mode 100644 tests/e2e/vLLM/e2e-smoke.list

diff --git a/tests/e2e/vLLM/e2e-smoke.list b/tests/e2e/vLLM/e2e-smoke.list
new file mode 100644
index 0000000000..e7f42d4a1e
--- /dev/null
+++ b/tests/e2e/vLLM/e2e-smoke.list
@@ -0,0 +1,6 @@
+fp8_dynamic_per_token.yaml
+kv_cache_gptq_tinyllama.yaml
+sparse2of4_fp8_dynamic.yaml
+w4a16_grouped_quant_asym_awq.yaml
+w4a16_actorder_weight.yaml
+int8_channel_weight_static_per_tensor_act.yaml
diff --git a/tests/e2e/vLLM/run_tests.sh b/tests/e2e/vLLM/run_tests.sh
index 9a2fcec9cd..988319da55 100644
--- a/tests/e2e/vLLM/run_tests.sh
+++ b/tests/e2e/vLLM/run_tests.sh
@@ -16,8 +16,17 @@ while getopts "c:t:" OPT; do
   esac
 done
 
+script_path=$(dirname "${BASH_SOURCE[0]}")
+if [ -d "$CONFIG" ]; then
+    echo "Config is provided as a folder: $CONFIG"
+    CONFIGS=`ls "$CONFIG"`
+elif [ -f "$CONFIG" ]; then
+    echo "Config is provided as a file: $CONFIG"
+    CONFIGS=`cat "$CONFIG"`
+fi
+
 # Parse list of configs.
-for MODEL_CONFIG in "$CONFIG"/*
+for MODEL_CONFIG in $(echo -e "$CONFIGS" | sed "s|^|${script_path}/configs/|")
 do
     LOCAL_SUCCESS=0
 
diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 9c099a5aea..507b588ed6 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -21,8 +21,9 @@
     "TEST_DATA_FILE", "tests/e2e/vLLM/configs/int8_dynamic_per_token.yaml"
 )
 SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "")
-# vllm python environment
+# vllm environment: image url, same (default), or the path of vllm virtualenv
 VLLM_PYTHON_ENV = os.environ.get("VLLM_PYTHON_ENV", "same")
+RUN_SAVE_DIR=os.environ.get("RUN_SAVE_DIR", "none")
 TIMINGS_DIR = os.environ.get("TIMINGS_DIR", "timings/e2e-test_vllm")
 os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
 EXPECTED_SAVED_FILES = [
@@ -31,7 +32,11 @@
     "recipe.yaml",
     "tokenizer.json",
 ]
-
+IS_VLLM_IMAGE = false
+# when using vllm image, needs to save the generated model and vllm command
+if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exist()):
+    IS_VLLM_IMAGE = true
+    assert RUN_SAVE_DIR != "none", "To use vllm image must set RUN_SAVE_DIR too!"
 
 # Will run each test case in its own process through run_tests.sh
 # emulating vLLM CI testing
@@ -76,18 +81,29 @@ def set_up(self, test_data_file: str):
         self.max_seq_length = eval_config.get("max_seq_length", 2048)
         # GPU memory utilization - only set if explicitly provided in config
         self.gpu_memory_utilization = eval_config.get("gpu_memory_utilization")
-        # vllm python env - if same, use the current python env, otherwise use
-        # the python passed in VLLM_PYTHON_ENV
-        if VLLM_PYTHON_ENV.lower() != "same":
-            self.vllm_env = VLLM_PYTHON_ENV
-        else:
+        self.is_vllm_image = IS_VLLM_IMAGE
+        if VLLM_PYTHON_ENV.lower() == "same":
             self.vllm_env = sys.executable
+        else:
+            self.vllm_env = VLLM_PYTHON_ENV
+
+        if RUN_SAVE_DIR != "none":
+            assert sd_path.exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}"
+            self.run_save_dir = RUN_SAVE_DIR
 
         if not self.save_dir:
-            self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
+            if RUN_SAVE_DIR != "none":
+                self.save_dir = os.path.join(RUN_SAVE_DIR, self.model.split("/")[1] + f"-{self.scheme}")
+            else:
+                self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
 
         logger.info("========== RUNNING ==============")
-        logger.info(self.save_dir)
+        logger.info(f"model save dir: {self.save_dir}")
+
+        # command file to run vllm if using vllm image
+        if self.is_vllm_image:
+            self.vllm_cmd_file = os.path.join(RUN_SAVE_DIR, "vllm.cmd")
+            logger.info(f"vllm cmd file save dir: {self.vllm_cmd_file}")
 
         self.prompts = [
             "The capital of France is",
@@ -100,8 +116,9 @@ def test_vllm(self, test_data_file: str):
         # Run vLLM with saved model
 
         self.set_up(test_data_file)
-        if not self.save_dir:
-            self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
+        # not need this anymore?
+        #if not self.save_dir:
+        #    self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
         oneshot_model, tokenizer = run_oneshot_for_e2e_testing(
             model=self.model,
             model_class=self.model_class,
@@ -151,10 +168,13 @@ def test_vllm(self, test_data_file: str):
                 folder_path=self.save_dir,
             )
 
-        if VLLM_PYTHON_ENV.lower() == "same":
-            logger.info("========== RUNNING vLLM in the same python env ==========")
+        if self.is_vllm_image:
+            logger.info("========== To run vLLM with vllm image ==========")
         else:
-            logger.info("========== RUNNING vLLM in a separate python env ==========")
+            if VLLM_PYTHON_ENV.lower() == "same":
+                logger.info("========== RUNNING vLLM in the same python env ==========")
+            else:
+                logger.info("========== RUNNING vLLM in a separate python env ==========")
 
         self._run_vllm(logger)
 
@@ -200,20 +220,31 @@ def _run_vllm(self, logger):
         test_file_dir = os.path.dirname(os.path.abspath(__file__))
         run_file_path = os.path.join(test_file_dir, "run_vllm.py")
 
-        logger.info("Run vllm in subprocess.Popen() using python env:")
+        logger.info("Run vllm using python env:")
         logger.info(self.vllm_env)
 
-        result = subprocess.Popen(
-            [self.vllm_env, run_file_path, json_scheme, json_llm_kwargs, json_prompts],
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-        )
-        stdout, stderr = result.communicate()
-        logger.info(stdout)
+        if self.is_vllm_image:
+            vllm_cmd = " ".join(
+                "python", run_file_path, f"'{json_scheme}'",
+                f"'{json_llm_kwargs}'", f"'{json_prompts}'")
+            with open(self.vllm_cmd_file, "a") as cf:
+                cf.write(vllm_cmd)
+            logger.info(f"Wrote vllm cmd into {vllm_cmd_file}:")
+            logger.info(vllm_cmd)
+        else:
+            logger.info("Run vllm in subprocess.Popen using python env:")
+            logger.info(self.vllm_env)
+            result = subprocess.Popen(
+                [self.vllm_env, run_file_path, json_scheme, json_llm_kwargs, json_prompts],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+            )
+            stdout, stderr = result.communicate()
+            logger.info(stdout)
 
-        error_msg = f"ERROR: vLLM failed with exit code {result.returncode}: {stderr}"
-        assert result.returncode == 0, error_msg
+            error_msg = f"ERROR: vLLM failed with exit code {result.returncode}: {stderr}"
+            assert result.returncode == 0, error_msg
 
     def _check_session_contains_recipe(self) -> None:
         session = active_session()

From afbf81197206f8c96e16c54150581be79ccac36f Mon Sep 17 00:00:00 2001
From: Dan Huang <dan.huang@neuralmagic.com>
Date: Fri, 31 Oct 2025 10:29:20 -0400
Subject: [PATCH 02/41] fix a typo

Signed-off-by: Dan Huang <dan.huang@neuralmagic.com>
---
 tests/e2e/vLLM/test_vllm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 507b588ed6..b1c1ab065b 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -32,10 +32,10 @@
     "recipe.yaml",
     "tokenizer.json",
 ]
-IS_VLLM_IMAGE = false
+IS_VLLM_IMAGE = False
 # when using vllm image, needs to save the generated model and vllm command
 if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exist()):
-    IS_VLLM_IMAGE = true
+    IS_VLLM_IMAGE = True
     assert RUN_SAVE_DIR != "none", "To use vllm image must set RUN_SAVE_DIR too!"
 
 # Will run each test case in its own process through run_tests.sh

From a55e5c84d29b49b05b51fdcf6f75cafcc5fe1042 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Fri, 31 Oct 2025 11:01:05 -0400
Subject: [PATCH 03/41] fix typo again

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/run_tests.sh | 1 +
 tests/e2e/vLLM/test_vllm.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/e2e/vLLM/run_tests.sh b/tests/e2e/vLLM/run_tests.sh
index 988319da55..1f94cf06a3 100644
--- a/tests/e2e/vLLM/run_tests.sh
+++ b/tests/e2e/vLLM/run_tests.sh
@@ -24,6 +24,7 @@ elif [ -f "$CONFIG" ]; then
     echo "Config is provided as a file: $CONFIG"
     CONFIGS=`cat "$CONFIG"`
 fi
+echo "$CONFIGS"
 
 # Parse list of configs.
 for MODEL_CONFIG in $(echo -e "$CONFIGS" | sed "s|^|${script_path}/configs/|")
diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index b1c1ab065b..6dcf77b426 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -34,7 +34,7 @@
 ]
 IS_VLLM_IMAGE = False
 # when using vllm image, needs to save the generated model and vllm command
-if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exist()):
+if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()):
     IS_VLLM_IMAGE = True
     assert RUN_SAVE_DIR != "none", "To use vllm image must set RUN_SAVE_DIR too!"
 

From ceee6817c8d390a5342f56bf6e678503ce0cf4fa Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Fri, 31 Oct 2025 11:04:24 -0400
Subject: [PATCH 04/41] fix an issue

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 6dcf77b426..b41ae7b6c7 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -88,7 +88,7 @@ def set_up(self, test_data_file: str):
             self.vllm_env = VLLM_PYTHON_ENV
 
         if RUN_SAVE_DIR != "none":
-            assert sd_path.exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}"
+            assert RUN_SAVE_DIR.exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}"
             self.run_save_dir = RUN_SAVE_DIR
 
         if not self.save_dir:

From bcc7a507b646b54a53c6b45e89bf7f130b161389 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Fri, 31 Oct 2025 11:05:52 -0400
Subject: [PATCH 05/41] fix an issue

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index b41ae7b6c7..020438f180 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -88,7 +88,7 @@ def set_up(self, test_data_file: str):
             self.vllm_env = VLLM_PYTHON_ENV
 
         if RUN_SAVE_DIR != "none":
-            assert RUN_SAVE_DIR.exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}"
+            assert Path(RUN_SAVE_DIR).exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}"
             self.run_save_dir = RUN_SAVE_DIR
 
         if not self.save_dir:

From 665cd1eb64db76b1308e8b15b50a8472f346e020 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Fri, 31 Oct 2025 11:21:14 -0400
Subject: [PATCH 06/41] fix cmd string

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 020438f180..6c57863cd9 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -224,9 +224,9 @@ def _run_vllm(self, logger):
         logger.info(self.vllm_env)
 
         if self.is_vllm_image:
-            vllm_cmd = " ".join(
-                "python", run_file_path, f"'{json_scheme}'",
-                f"'{json_llm_kwargs}'", f"'{json_prompts}'")
+            cmds = ["python", run_file_path, f"'{json_scheme}'",
+                    f"'{json_llm_kwargs}'", f"'{json_prompts}'"]
+            vllm_cmd = " ".join(cmds)
             with open(self.vllm_cmd_file, "a") as cf:
                 cf.write(vllm_cmd)
             logger.info(f"Wrote vllm cmd into {vllm_cmd_file}:")

From 4bf0dc1552c6eccd7146dbbc75d010428d5894b1 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Fri, 31 Oct 2025 11:27:00 -0400
Subject: [PATCH 07/41] fix an issue

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 6c57863cd9..9a761c2be5 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -220,7 +220,7 @@ def _run_vllm(self, logger):
         test_file_dir = os.path.dirname(os.path.abspath(__file__))
         run_file_path = os.path.join(test_file_dir, "run_vllm.py")
 
-        logger.info("Run vllm using python env:")
+        logger.info("Run vllm using env:")
         logger.info(self.vllm_env)
 
         if self.is_vllm_image:
@@ -229,7 +229,7 @@ def _run_vllm(self, logger):
             vllm_cmd = " ".join(cmds)
             with open(self.vllm_cmd_file, "a") as cf:
                 cf.write(vllm_cmd)
-            logger.info(f"Wrote vllm cmd into {vllm_cmd_file}:")
+            logger.info(f"Wrote vllm cmd into {self.vllm_cmd_file}:")
             logger.info(vllm_cmd)
         else:
             logger.info("Run vllm in subprocess.Popen using python env:")

From 59cea151e5ced5b98bd93f5509f08e578dacb242 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Fri, 31 Oct 2025 16:56:53 -0400
Subject: [PATCH 08/41] add debugging

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 9a761c2be5..c570bac384 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -168,6 +168,7 @@ def test_vllm(self, test_data_file: str):
                 folder_path=self.save_dir,
             )
 
+        logger.info(f"Before vllm starts, here is self.save_dir: {self.save_dir}")
         if self.is_vllm_image:
             logger.info("========== To run vLLM with vllm image ==========")
         else:
@@ -228,7 +229,7 @@ def _run_vllm(self, logger):
                     f"'{json_llm_kwargs}'", f"'{json_prompts}'"]
             vllm_cmd = " ".join(cmds)
             with open(self.vllm_cmd_file, "a") as cf:
-                cf.write(vllm_cmd)
+                cf.write(vllm_cmd + "\n")
             logger.info(f"Wrote vllm cmd into {self.vllm_cmd_file}:")
             logger.info(vllm_cmd)
         else:

From be75c8d978fb4bd2a5d99d54fc43d471fe85466a Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Mon, 3 Nov 2025 14:21:10 -0500
Subject: [PATCH 09/41] don't delete run folder if using image

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index c570bac384..7796994c5f 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -182,7 +182,7 @@ def test_vllm(self, test_data_file: str):
         self.tear_down()
 
     def tear_down(self):
-        if self.save_dir is not None and os.path.isdir(self.save_dir):
+        if not IS_VLLM_IMAGE and self.save_dir is not None and os.path.isdir(self.save_dir):
             shutil.rmtree(self.save_dir)
 
         timer = get_singleton_manager()

From 586dcc18357a75f819cc7cbd53e73d91e90f714c Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Wed, 5 Nov 2025 14:39:50 -0500
Subject: [PATCH 10/41] allow using pulled image or deployed runner

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 66 ++++++++++++++++++++++++-------------
 1 file changed, 43 insertions(+), 23 deletions(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 7796994c5f..7cc6f37ce0 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -15,15 +15,30 @@
 from tests.test_timer.timer_utils import get_singleton_manager, log_time
 from tests.testing_utils import requires_gpu
 
+
+def is_quay_image(url: str) -> bool:
+    pattern = r"^quay\.io/[a-z0-9][a-z0-9-_]*/[a-z0-9][a-z0-9-_/]*:[\w][\w.-]*$"
+    return re.match(pattern, url) is not None
+
 HF_MODEL_HUB_NAME = "nm-testing"
 
 TEST_DATA_FILE = os.environ.get(
     "TEST_DATA_FILE", "tests/e2e/vLLM/configs/int8_dynamic_per_token.yaml"
 )
 SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "")
-# vllm environment: image url, same (default), or the path of vllm virtualenv
+# vllm environment: image url, deployed runner name, same (default), or the path of vllm virtualenv
 VLLM_PYTHON_ENV = os.environ.get("VLLM_PYTHON_ENV", "same")
+IS_VLLM_IMAGE = False
+IS_VLLM_IMAGE_DEPLOYED=False
 RUN_SAVE_DIR=os.environ.get("RUN_SAVE_DIR", "none")
+VLLM_VOLUME_MOUNT_DIR=os.environ.get("VLLM_VOLUME_MOUNT_DIR", "/opt/app-root/runs")
+# when using vllm image, needs to save the generated model and vllm command
+if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()):
+    IS_VLLM_IMAGE = True
+    if not is_quay_image(VLLM_PYTHON_ENV):
+        IS_VLLM_IMAGE_DEPLOYED = True
+        assert RUN_SAVE_DIR != "none", "To use vllm image must set RUN_SAVE_DIR too!"
+
 TIMINGS_DIR = os.environ.get("TIMINGS_DIR", "timings/e2e-test_vllm")
 os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
 EXPECTED_SAVED_FILES = [
@@ -32,11 +47,6 @@
     "recipe.yaml",
     "tokenizer.json",
 ]
-IS_VLLM_IMAGE = False
-# when using vllm image, needs to save the generated model and vllm command
-if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()):
-    IS_VLLM_IMAGE = True
-    assert RUN_SAVE_DIR != "none", "To use vllm image must set RUN_SAVE_DIR too!"
 
 # Will run each test case in its own process through run_tests.sh
 # emulating vLLM CI testing
@@ -61,6 +71,7 @@ class TestvLLM:
     be used for quantization. Otherwise, the recipe will always be used if given.
     """  # noqa: E501
 
+
     def set_up(self, test_data_file: str):
         eval_config = yaml.safe_load(Path(test_data_file).read_text(encoding="utf-8"))
 
@@ -81,7 +92,7 @@ def set_up(self, test_data_file: str):
         self.max_seq_length = eval_config.get("max_seq_length", 2048)
         # GPU memory utilization - only set if explicitly provided in config
         self.gpu_memory_utilization = eval_config.get("gpu_memory_utilization")
-        self.is_vllm_image = IS_VLLM_IMAGE
+        #self.is_vllm_image = IS_VLLM_IMAGE
         if VLLM_PYTHON_ENV.lower() == "same":
             self.vllm_env = sys.executable
         else:
@@ -90,20 +101,19 @@ def set_up(self, test_data_file: str):
         if RUN_SAVE_DIR != "none":
             assert Path(RUN_SAVE_DIR).exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}"
             self.run_save_dir = RUN_SAVE_DIR
+            # RUN_SAVE_DIR overwrites config save_dir
+            self.save_dir = os.path.join(RUN_SAVE_DIR, self.model.split("/")[1] + f"-{self.scheme}")
 
         if not self.save_dir:
-            if RUN_SAVE_DIR != "none":
-                self.save_dir = os.path.join(RUN_SAVE_DIR, self.model.split("/")[1] + f"-{self.scheme}")
-            else:
-                self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
+            self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
 
         logger.info("========== RUNNING ==============")
         logger.info(f"model save dir: {self.save_dir}")
 
-        # command file to run vllm if using vllm image
-        if self.is_vllm_image:
-            self.vllm_cmd_file = os.path.join(RUN_SAVE_DIR, "vllm.cmd")
-            logger.info(f"vllm cmd file save dir: {self.vllm_cmd_file}")
+        # script to run vllm if using vllm image
+        if IS_VLLM_IMAGE:
+            self.vllm_bash = os.path.join(RUN_SAVE_DIR, "run-vllm.bash")
+            logger.info(f"vllm bash save dir: {self.vllm_bash}")
 
         self.prompts = [
             "The capital of France is",
@@ -151,7 +161,8 @@ def test_vllm(self, test_data_file: str):
             fp.write(recipe_yaml_str)
         session.reset()
 
-        if SKIP_HF_UPLOAD.lower() != "yes":
+        # if vllm image is used, don't upload
+        if SKIP_HF_UPLOAD.lower() != "yes" and not IS_VLLM_IMAGE:
             logger.info("================= UPLOADING TO HUB ======================")
 
             stub = f"{HF_MODEL_HUB_NAME}/{self.save_dir}-e2e"
@@ -168,8 +179,7 @@ def test_vllm(self, test_data_file: str):
                 folder_path=self.save_dir,
             )
 
-        logger.info(f"Before vllm starts, here is self.save_dir: {self.save_dir}")
-        if self.is_vllm_image:
+        if IS_VLLM_IMAGE:
             logger.info("========== To run vLLM with vllm image ==========")
         else:
             if VLLM_PYTHON_ENV.lower() == "same":
@@ -182,6 +192,7 @@ def test_vllm(self, test_data_file: str):
         self.tear_down()
 
     def tear_down(self):
+        # model save_dir is needed for vllm image testing
         if not IS_VLLM_IMAGE and self.save_dir is not None and os.path.isdir(self.save_dir):
             shutil.rmtree(self.save_dir)
 
@@ -209,7 +220,10 @@ def _run_vllm(self, logger):
         import json
         import subprocess
 
-        llm_kwargs = {"model": self.save_dir}
+        llm_kwargs = {"model": self.save_dir(}
+        if IS_VLLM_IMAGE:
+            llm_kwargs = {"model":
+                self.save_dir.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR))}
 
         if self.gpu_memory_utilization is not None:
             llm_kwargs["gpu_memory_utilization"] = self.gpu_memory_utilization
@@ -219,20 +233,26 @@ def _run_vllm(self, logger):
         json_prompts = json.dumps(self.prompts)
 
         test_file_dir = os.path.dirname(os.path.abspath(__file__))
-        run_file_path = os.path.join(test_file_dir, "run_vllm.py")
 
         logger.info("Run vllm using env:")
         logger.info(self.vllm_env)
 
-        if self.is_vllm_image:
+        if IS_VLLM_IMAGE:
+            run_file_path = os.path.join(VLLM_VOLUME_MOUNT_DIR, "run_vllm.py")
             cmds = ["python", run_file_path, f"'{json_scheme}'",
                     f"'{json_llm_kwargs}'", f"'{json_prompts}'"]
             vllm_cmd = " ".join(cmds)
-            with open(self.vllm_cmd_file, "a") as cf:
+            with open(self.vllm_bash, "w") as cf:
+                cf.write("#!/bin/bash\n\n")
                 cf.write(vllm_cmd + "\n")
-            logger.info(f"Wrote vllm cmd into {self.vllm_cmd_file}:")
+            logger.info(f"Wrote vllm cmd into {self.vllm_bash}:")
             logger.info(vllm_cmd)
+            if IS_VLLM_IMAGE_DEPLOYED:
+                logger.info("vllm image is deployed. Run vllm cmd with kubectl.")
+            else:
+                logger.info("use vllm image directly. Run vllm cmd with podman.")
         else:
+            run_file_path = os.path.join(test_file_dir, "run_vllm.py")
             logger.info("Run vllm in subprocess.Popen using python env:")
             logger.info(self.vllm_env)
             result = subprocess.Popen(

From c1dde7f2f65b650d80c55b613501c48442377ca4 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Wed, 5 Nov 2025 15:05:30 -0500
Subject: [PATCH 11/41] fix a typo

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 7cc6f37ce0..62523e73d4 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -220,7 +220,7 @@ def _run_vllm(self, logger):
         import json
         import subprocess
 
-        llm_kwargs = {"model": self.save_dir(}
+        llm_kwargs = {"model": self.save_dir}
         if IS_VLLM_IMAGE:
             llm_kwargs = {"model":
                 self.save_dir.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR))}

From ae9e526791a036540c3e2337c13de3ee43d94fc5 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Wed, 5 Nov 2025 15:06:41 -0500
Subject: [PATCH 12/41] remove extra )

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 62523e73d4..e4330c6eaa 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -223,7 +223,7 @@ def _run_vllm(self, logger):
         llm_kwargs = {"model": self.save_dir}
         if IS_VLLM_IMAGE:
             llm_kwargs = {"model":
-                self.save_dir.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR))}
+                self.save_dir.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR)}
 
         if self.gpu_memory_utilization is not None:
             llm_kwargs["gpu_memory_utilization"] = self.gpu_memory_utilization

From 80352db9526aa37918419a2328460b44b3628cf4 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Wed, 5 Nov 2025 16:27:48 -0500
Subject: [PATCH 13/41] run vllm with podman

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index e4330c6eaa..456a03fa6f 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -250,7 +250,21 @@ def _run_vllm(self, logger):
             if IS_VLLM_IMAGE_DEPLOYED:
                 logger.info("vllm image is deployed. Run vllm cmd with kubectl.")
             else:
-                logger.info("use vllm image directly. Run vllm cmd with podman.")
+                logger.info("Run vllm in subprocess.Popen with podman using vllm:")
+                logger.info(self.vllm_env)
+                result = subprocess.Popen(
+                    ["podman", "run --rm -it --device nvidia.com/gpu=0",
+                     "--security-opt=label=disable --userns=keep-id:uid=1001",
+                     "--env=VLLM_NO_USAGE_STATS=1 --entrypoint=self.vllm_bash",
+                     "-v RUN_SAVE_DIR:VLLM_VOLUME_MOUNT_DIR ${VLLM_PYTHON_ENV}"],
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                    text=True,
+            )
+            stdout, stderr = result.communicate()
+            logger.info(stdout)
+            error_msg = f"ERROR: vLLM failed with exit code {result.returncode}: {stderr}"
+            assert result.returncode == 0, error_msg
         else:
             run_file_path = os.path.join(test_file_dir, "run_vllm.py")
             logger.info("Run vllm in subprocess.Popen using python env:")

From 8461d03fab753991dec104bad67716f1c0dffd6c Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Wed, 5 Nov 2025 16:30:49 -0500
Subject: [PATCH 14/41] fix error

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 456a03fa6f..ef010dd13a 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -253,10 +253,10 @@ def _run_vllm(self, logger):
                 logger.info("Run vllm in subprocess.Popen with podman using vllm:")
                 logger.info(self.vllm_env)
                 result = subprocess.Popen(
-                    ["podman", "run --rm -it --device nvidia.com/gpu=0",
-                     "--security-opt=label=disable --userns=keep-id:uid=1001",
-                     "--env=VLLM_NO_USAGE_STATS=1 --entrypoint=self.vllm_bash",
-                     "-v RUN_SAVE_DIR:VLLM_VOLUME_MOUNT_DIR ${VLLM_PYTHON_ENV}"],
+                    ["podman", "run --rm -it --device nvidia.com/gpu=0
+                     --security-opt=label=disable --userns=keep-id:uid=1001
+                     --env=VLLM_NO_USAGE_STATS=1 --entrypoint=self.vllm_bash
+                     -v RUN_SAVE_DIR:VLLM_VOLUME_MOUNT_DIR ${VLLM_PYTHON_ENV}"],
                     stdout=subprocess.PIPE,
                     stderr=subprocess.PIPE,
                     text=True,

From 5704e62ca7246872972184fd4488ff8286d6f249 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Wed, 5 Nov 2025 16:37:12 -0500
Subject: [PATCH 15/41] fix issues

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index ef010dd13a..16097f9f9e 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -242,8 +242,9 @@ def _run_vllm(self, logger):
             cmds = ["python", run_file_path, f"'{json_scheme}'",
                     f"'{json_llm_kwargs}'", f"'{json_prompts}'"]
             vllm_cmd = " ".join(cmds)
-            with open(self.vllm_bash, "w") as cf:
+            with open(self.vllm_bash, "a") as cf:
                 cf.write("#!/bin/bash\n\n")
+                cf.write("export VLLM_NO_USAGE_STATS=1\n\n")
                 cf.write(vllm_cmd + "\n")
             logger.info(f"Wrote vllm cmd into {self.vllm_bash}:")
             logger.info(vllm_cmd)
@@ -253,18 +254,22 @@ def _run_vllm(self, logger):
                 logger.info("Run vllm in subprocess.Popen with podman using vllm:")
                 logger.info(self.vllm_env)
                 result = subprocess.Popen(
-                    ["podman", "run --rm -it --device nvidia.com/gpu=0
-                     --security-opt=label=disable --userns=keep-id:uid=1001
-                     --env=VLLM_NO_USAGE_STATS=1 --entrypoint=self.vllm_bash
-                     -v RUN_SAVE_DIR:VLLM_VOLUME_MOUNT_DIR ${VLLM_PYTHON_ENV}"],
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                    text=True,
-            )
-            stdout, stderr = result.communicate()
-            logger.info(stdout)
-            error_msg = f"ERROR: vLLM failed with exit code {result.returncode}: {stderr}"
-            assert result.returncode == 0, error_msg
+                    [
+                     "podman", "run", "--rm",
+                     "--device", "nvidia.com/gpu=0",
+                     "--security-opt=label=disable",
+                     "--userns=keep-id:uid=1001",
+                     "--entrypoint", self.vllm_bash,
+                     "-v", f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}",
+                     VLLM_PYTHON_ENV,
+                   ],
+                   stdout=subprocess.PIPE,
+                   stderr=subprocess.PIPE,
+                   text=True)
+                stdout, stderr = result.communicate()
+                logger.info(stdout)
+                error_msg = f"ERROR: vLLM failed with exit code {result.returncode}: {stderr}"
+                assert result.returncode == 0, error_msg
         else:
             run_file_path = os.path.join(test_file_dir, "run_vllm.py")
             logger.info("Run vllm in subprocess.Popen using python env:")

From 098f561e5b4fc53631b5ce25c687c6873ccaeb16 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Wed, 5 Nov 2025 16:40:01 -0500
Subject: [PATCH 16/41] fix path

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 16097f9f9e..d28540f282 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -259,7 +259,8 @@ def _run_vllm(self, logger):
                      "--device", "nvidia.com/gpu=0",
                      "--security-opt=label=disable",
                      "--userns=keep-id:uid=1001",
-                     "--entrypoint", self.vllm_bash,
+                     "--entrypoint",
+                     self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR),
                      "-v", f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}",
                      VLLM_PYTHON_ENV,
                    ],

From d56440850efb0432d7b0144f19cc206fdcea5f41 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Wed, 5 Nov 2025 16:53:11 -0500
Subject: [PATCH 17/41] improve output

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index d28540f282..fbf194644f 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -242,21 +242,30 @@ def _run_vllm(self, logger):
             cmds = ["python", run_file_path, f"'{json_scheme}'",
                     f"'{json_llm_kwargs}'", f"'{json_prompts}'"]
             vllm_cmd = " ".join(cmds)
-            with open(self.vllm_bash, "a") as cf:
-                cf.write("#!/bin/bash\n\n")
-                cf.write("export VLLM_NO_USAGE_STATS=1\n\n")
-                cf.write(vllm_cmd + "\n")
+            with open(self.vllm_bash, "w") as cf:
+                cf.write(f"#!/bin/bash\n\n
+                           export VLLM_NO_USAGE_STATS=1\n\n")
+                           {vllm_cmd}\n")
             logger.info(f"Wrote vllm cmd into {self.vllm_bash}:")
             logger.info(vllm_cmd)
             if IS_VLLM_IMAGE_DEPLOYED:
                 logger.info("vllm image is deployed. Run vllm cmd with kubectl.")
             else:
-                logger.info("Run vllm in subprocess.Popen with podman using vllm:")
-                logger.info(self.vllm_env)
+                podman_cmd = " ".join("podman",
+                    "run", "--rm", "--device",
+                    "nvidia.com/gpu=all",
+                    "--security-opt=label=disable",
+                    "--userns=keep-id:uid=1001",
+                    "--entrypoint",
+                    self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR),
+                    "-v", f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}",
+                    VLLM_PYTHON_ENV)
+                logger.info("podman command:")
+                logger.info(podman_cmd)
                 result = subprocess.Popen(
                     [
                      "podman", "run", "--rm",
-                     "--device", "nvidia.com/gpu=0",
+                     "--device", "nvidia.com/gpu=all",
                      "--security-opt=label=disable",
                      "--userns=keep-id:uid=1001",
                      "--entrypoint",
@@ -267,10 +276,6 @@ def _run_vllm(self, logger):
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                    text=True)
-                stdout, stderr = result.communicate()
-                logger.info(stdout)
-                error_msg = f"ERROR: vLLM failed with exit code {result.returncode}: {stderr}"
-                assert result.returncode == 0, error_msg
         else:
             run_file_path = os.path.join(test_file_dir, "run_vllm.py")
             logger.info("Run vllm in subprocess.Popen using python env:")
@@ -281,11 +286,12 @@ def _run_vllm(self, logger):
                 stderr=subprocess.PIPE,
                 text=True,
             )
-            stdout, stderr = result.communicate()
-            logger.info(stdout)
 
-            error_msg = f"ERROR: vLLM failed with exit code {result.returncode}: {stderr}"
-            assert result.returncode == 0, error_msg
+        stdout, stderr = result.communicate()
+        logger.info(stdout)
+
+        error_msg = f"ERROR: vLLM failed with exit code {result.returncode}: {stderr}"
+        assert result.returncode == 0, error_msg
 
     def _check_session_contains_recipe(self) -> None:
         session = active_session()

From 5da7eee819d1bc8753c82f27da67ba436da437a1 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Wed, 5 Nov 2025 16:54:45 -0500
Subject: [PATCH 18/41] fix typo

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index fbf194644f..1c0ad550f6 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -244,7 +244,7 @@ def _run_vllm(self, logger):
             vllm_cmd = " ".join(cmds)
             with open(self.vllm_bash, "w") as cf:
                 cf.write(f"#!/bin/bash\n\n
-                           export VLLM_NO_USAGE_STATS=1\n\n")
+                           export VLLM_NO_USAGE_STATS=1\n\n
                            {vllm_cmd}\n")
             logger.info(f"Wrote vllm cmd into {self.vllm_bash}:")
             logger.info(vllm_cmd)

From 4cb22517ee35e5031e7b265c3e620a39d9539c58 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Wed, 5 Nov 2025 17:02:18 -0500
Subject: [PATCH 19/41] fix format

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 1c0ad550f6..4a73b1141a 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -243,9 +243,10 @@ def _run_vllm(self, logger):
                     f"'{json_llm_kwargs}'", f"'{json_prompts}'"]
             vllm_cmd = " ".join(cmds)
             with open(self.vllm_bash, "w") as cf:
-                cf.write(f"#!/bin/bash\n\n
-                           export VLLM_NO_USAGE_STATS=1\n\n
-                           {vllm_cmd}\n")
+                cf.write(f"""#!/bin/bash
+                    export VLLM_NO_USAGE_STATS=1
+                    {vllm_cmd}
+                    """)
             logger.info(f"Wrote vllm cmd into {self.vllm_bash}:")
             logger.info(vllm_cmd)
             if IS_VLLM_IMAGE_DEPLOYED:

From d2cb6464fcbb46652b07bceb8dd262248a12a379 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Wed, 5 Nov 2025 17:08:19 -0500
Subject: [PATCH 20/41] fix command

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 4a73b1141a..0fd8075b1b 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -252,15 +252,14 @@ def _run_vllm(self, logger):
             if IS_VLLM_IMAGE_DEPLOYED:
                 logger.info("vllm image is deployed. Run vllm cmd with kubectl.")
             else:
-                podman_cmd = " ".join("podman",
-                    "run", "--rm", "--device",
-                    "nvidia.com/gpu=all",
+                cmds = ["podman run --rm --device nvidia.com/gpu=all",
                     "--security-opt=label=disable",
                     "--userns=keep-id:uid=1001",
                     "--entrypoint",
                     self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR),
                     "-v", f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}",
-                    VLLM_PYTHON_ENV)
+                    VLLM_PYTHON_ENV]
+                podman_cmd = " ".join(cmds)
                 logger.info("podman command:")
                 logger.info(podman_cmd)
                 result = subprocess.Popen(

From 5cdb54306229ae9b77dfc11b6ee902a5bb7efe9c Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Wed, 5 Nov 2025 17:11:26 -0500
Subject: [PATCH 21/41] allow file to execute

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 0fd8075b1b..5911584919 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -247,6 +247,7 @@ def _run_vllm(self, logger):
                     export VLLM_NO_USAGE_STATS=1
                     {vllm_cmd}
                     """)
+            os.chmod(self.vllm_bash, 0o755)
             logger.info(f"Wrote vllm cmd into {self.vllm_bash}:")
             logger.info(vllm_cmd)
             if IS_VLLM_IMAGE_DEPLOYED:

From 6dc42c41962820f0529b2616c75c8ecad6c3c0f6 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Wed, 5 Nov 2025 17:14:01 -0500
Subject: [PATCH 22/41] minor update

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 5911584919..d4607b89d6 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -244,6 +244,7 @@ def _run_vllm(self, logger):
             vllm_cmd = " ".join(cmds)
             with open(self.vllm_bash, "w") as cf:
                 cf.write(f"""#!/bin/bash
+                    export HF_HUB_OFFLINE=0
                     export VLLM_NO_USAGE_STATS=1
                     {vllm_cmd}
                     """)

From 84634e0de77ee1e7db693c9f82cbfd97087bc235 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Wed, 5 Nov 2025 17:23:40 -0500
Subject: [PATCH 23/41] copy file

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index d4607b89d6..c891c53841 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -239,6 +239,7 @@ def _run_vllm(self, logger):
 
         if IS_VLLM_IMAGE:
             run_file_path = os.path.join(VLLM_VOLUME_MOUNT_DIR, "run_vllm.py")
+            shutil.copy(os.path.join(test_file_dir, "run_vllm.py"), run_file_path)
             cmds = ["python", run_file_path, f"'{json_scheme}'",
                     f"'{json_llm_kwargs}'", f"'{json_prompts}'"]
             vllm_cmd = " ".join(cmds)
@@ -268,8 +269,6 @@ def _run_vllm(self, logger):
                     [
                      "podman", "run", "--rm",
                      "--device", "nvidia.com/gpu=all",
-                     "--security-opt=label=disable",
-                     "--userns=keep-id:uid=1001",
                      "--entrypoint",
                      self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR),
                      "-v", f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}",

From 57c99acf17e4e81d357e93d8edb74633944c266e Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Wed, 5 Nov 2025 17:26:40 -0500
Subject: [PATCH 24/41] fix issue

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index c891c53841..b7fc6c2681 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -239,7 +239,8 @@ def _run_vllm(self, logger):
 
         if IS_VLLM_IMAGE:
             run_file_path = os.path.join(VLLM_VOLUME_MOUNT_DIR, "run_vllm.py")
-            shutil.copy(os.path.join(test_file_dir, "run_vllm.py"), run_file_path)
+            shutil.copy(os.path.join(test_file_dir, "run_vllm.py"), 
+                os.path.join(RUN_SAVE_DIR, "run_vllm.py"))
             cmds = ["python", run_file_path, f"'{json_scheme}'",
                     f"'{json_llm_kwargs}'", f"'{json_prompts}'"]
             vllm_cmd = " ".join(cmds)

From 7cdedbbd5c2b842560c76d0e8f8ed03c511544a1 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Fri, 7 Nov 2025 13:57:54 -0500
Subject: [PATCH 25/41] run vllm in deployed pod

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/e2e-smoke.list    |  5 -----
 tests/e2e/vLLM/lmeval-smoke.list |  6 ++++++
 tests/e2e/vLLM/test_vllm.py      | 24 ++++++++++++++++--------
 3 files changed, 22 insertions(+), 13 deletions(-)
 create mode 100644 tests/e2e/vLLM/lmeval-smoke.list

diff --git a/tests/e2e/vLLM/e2e-smoke.list b/tests/e2e/vLLM/e2e-smoke.list
index e7f42d4a1e..9737d8d37a 100644
--- a/tests/e2e/vLLM/e2e-smoke.list
+++ b/tests/e2e/vLLM/e2e-smoke.list
@@ -1,6 +1 @@
 fp8_dynamic_per_token.yaml
-kv_cache_gptq_tinyllama.yaml
-sparse2of4_fp8_dynamic.yaml
-w4a16_grouped_quant_asym_awq.yaml
-w4a16_actorder_weight.yaml
-int8_channel_weight_static_per_tensor_act.yaml
diff --git a/tests/e2e/vLLM/lmeval-smoke.list b/tests/e2e/vLLM/lmeval-smoke.list
new file mode 100644
index 0000000000..e7f42d4a1e
--- /dev/null
+++ b/tests/e2e/vLLM/lmeval-smoke.list
@@ -0,0 +1,6 @@
+fp8_dynamic_per_token.yaml
+kv_cache_gptq_tinyllama.yaml
+sparse2of4_fp8_dynamic.yaml
+w4a16_grouped_quant_asym_awq.yaml
+w4a16_actorder_weight.yaml
+int8_channel_weight_static_per_tensor_act.yaml
diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index b7fc6c2681..231e9120d0 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -255,22 +255,30 @@ def _run_vllm(self, logger):
             logger.info(vllm_cmd)
             if IS_VLLM_IMAGE_DEPLOYED:
                 logger.info("vllm image is deployed. Run vllm cmd with kubectl.")
+                cmds = [f"kubectl exec -it VLLM_PYTHON_ENV -n arc-runners",
+                        f"-- /bin/bash {RUN_SAVE_DIR}/run-vllm.bash"]
+                kubectl_cmd = " ".join(cmds)
+                logger.info(f"kubectl command: {kubectl_cmd}")
+                result = subprocess.Popen(
+                    [
+                     "kubectl", "exec", "-it",
+                     VLLM_PYTHON_ENV, "-n arc-runners",
+                     "-- /bin/bash", f"{RUN_SAVE_DIR}/run-vllm.bash"
+                    ]
+                   stdout=subprocess.PIPE,
+                   stderr=subprocess.PIPE,
+                   text=True)
             else:
-                cmds = ["podman run --rm --device nvidia.com/gpu=all",
-                    "--security-opt=label=disable",
-                    "--userns=keep-id:uid=1001",
-                    "--entrypoint",
+                cmds = ["podman run --rm --device nvidia.com/gpu=all --entrypoint",
                     self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR),
                     "-v", f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}",
                     VLLM_PYTHON_ENV]
                 podman_cmd = " ".join(cmds)
-                logger.info("podman command:")
-                logger.info(podman_cmd)
+                logger.info(f"podman command: {podman_cmd}")
                 result = subprocess.Popen(
                     [
                      "podman", "run", "--rm",
-                     "--device", "nvidia.com/gpu=all",
-                     "--entrypoint",
+                     "--device", "nvidia.com/gpu=all", "--entrypoint",
                      self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR),
                      "-v", f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}",
                      VLLM_PYTHON_ENV,

From 3951475283aa80e946dc18c56bef82bbc4f6e97c Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Fri, 7 Nov 2025 15:15:32 -0500
Subject: [PATCH 26/41] missed ,

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 231e9120d0..e774913ffb 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -255,7 +255,7 @@ def _run_vllm(self, logger):
             logger.info(vllm_cmd)
             if IS_VLLM_IMAGE_DEPLOYED:
                 logger.info("vllm image is deployed. Run vllm cmd with kubectl.")
-                cmds = [f"kubectl exec -it VLLM_PYTHON_ENV -n arc-runners",
+                cmds = [f"kubectl exec -it {VLLM_PYTHON_ENV} -n arc-runners",
                         f"-- /bin/bash {RUN_SAVE_DIR}/run-vllm.bash"]
                 kubectl_cmd = " ".join(cmds)
                 logger.info(f"kubectl command: {kubectl_cmd}")
@@ -263,8 +263,8 @@ def _run_vllm(self, logger):
                     [
                      "kubectl", "exec", "-it",
                      VLLM_PYTHON_ENV, "-n arc-runners",
-                     "-- /bin/bash", f"{RUN_SAVE_DIR}/run-vllm.bash"
-                    ]
+                     "-- /bin/bash", f"{RUN_SAVE_DIR}/run-vllm.bash",
+                    ],
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                    text=True)

From 5c401fcba15abf6adda43d19966bf2a2a8f2aff6 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Fri, 7 Nov 2025 16:06:35 -0500
Subject: [PATCH 27/41] fix command

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index e774913ffb..fb3e406310 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -262,8 +262,8 @@ def _run_vllm(self, logger):
                 result = subprocess.Popen(
                     [
                      "kubectl", "exec", "-it",
-                     VLLM_PYTHON_ENV, "-n arc-runners",
-                     "-- /bin/bash", f"{RUN_SAVE_DIR}/run-vllm.bash",
+                     VLLM_PYTHON_ENV, "-n", "arc-runners",
+                     "--", "/bin/bash", f"{RUN_SAVE_DIR}/run-vllm.bash",
                     ],
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,

From 870b6ee07810d890bbdbca20c619bf9b13de877d Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Tue, 11 Nov 2025 11:54:51 -0500
Subject: [PATCH 28/41] remove VLLM_VOLUME_MOUNT_DIR

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index fb3e406310..fb22235b56 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -31,7 +31,7 @@ def is_quay_image(url: str) -> bool:
 IS_VLLM_IMAGE = False
 IS_VLLM_IMAGE_DEPLOYED=False
 RUN_SAVE_DIR=os.environ.get("RUN_SAVE_DIR", "none")
-VLLM_VOLUME_MOUNT_DIR=os.environ.get("VLLM_VOLUME_MOUNT_DIR", "/opt/app-root/runs")
+#VLLM_VOLUME_MOUNT_DIR=os.environ.get("VLLM_VOLUME_MOUNT_DIR", "/opt/app-root/runs")
 # when using vllm image, needs to save the generated model and vllm command
 if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()):
     IS_VLLM_IMAGE = True
@@ -221,9 +221,9 @@ def _run_vllm(self, logger):
         import subprocess
 
         llm_kwargs = {"model": self.save_dir}
-        if IS_VLLM_IMAGE:
-            llm_kwargs = {"model":
-                self.save_dir.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR)}
+        #if IS_VLLM_IMAGE:
+        #    llm_kwargs = {"model":
+        #        self.save_dir.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR)}
 
         if self.gpu_memory_utilization is not None:
             llm_kwargs["gpu_memory_utilization"] = self.gpu_memory_utilization
@@ -238,7 +238,7 @@ def _run_vllm(self, logger):
         logger.info(self.vllm_env)
 
         if IS_VLLM_IMAGE:
-            run_file_path = os.path.join(VLLM_VOLUME_MOUNT_DIR, "run_vllm.py")
+            #run_file_path = os.path.join(VLLM_VOLUME_MOUNT_DIR, "run_vllm.py")
             shutil.copy(os.path.join(test_file_dir, "run_vllm.py"), 
                 os.path.join(RUN_SAVE_DIR, "run_vllm.py"))
             cmds = ["python", run_file_path, f"'{json_scheme}'",
@@ -270,8 +270,10 @@ def _run_vllm(self, logger):
                    text=True)
             else:
                 cmds = ["podman run --rm --device nvidia.com/gpu=all --entrypoint",
-                    self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR),
-                    "-v", f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}",
+                    #self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR),
+                    self.vllm_bash,
+                    "-v", #f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}",
+                    f"{RUN_SAVE_DIR}:{RUN_SAVE_DIR}",
                     VLLM_PYTHON_ENV]
                 podman_cmd = " ".join(cmds)
                 logger.info(f"podman command: {podman_cmd}")
@@ -279,8 +281,10 @@ def _run_vllm(self, logger):
                     [
                      "podman", "run", "--rm",
                      "--device", "nvidia.com/gpu=all", "--entrypoint",
-                     self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR),
-                     "-v", f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}",
+                     #self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR),
+                     self.vllm_bash,
+                     "-v", #f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}",
+                     f"{RUN_SAVE_DIR}:{RUN_SAVE_DIR}",
                      VLLM_PYTHON_ENV,
                    ],
                    stdout=subprocess.PIPE,

From d23bdf4ba7d1cd734c02142456729a6ffa3b1ddd Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Tue, 11 Nov 2025 12:17:10 -0500
Subject: [PATCH 29/41] fix missing path

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index fb22235b56..bacff76f20 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -239,6 +239,7 @@ def _run_vllm(self, logger):
 
         if IS_VLLM_IMAGE:
             #run_file_path = os.path.join(VLLM_VOLUME_MOUNT_DIR, "run_vllm.py")
+            run_file_path = os.path.join(RUN_SAVE_DIR, "run_vllm.py")
             shutil.copy(os.path.join(test_file_dir, "run_vllm.py"), 
                 os.path.join(RUN_SAVE_DIR, "run_vllm.py"))
             cmds = ["python", run_file_path, f"'{json_scheme}'",

From 625c9db1bdfbb11143f4114a628302ab3f31bb74 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Tue, 11 Nov 2025 13:23:53 -0500
Subject: [PATCH 30/41] clean up

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 36 +++++++-----------------------------
 1 file changed, 7 insertions(+), 29 deletions(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index bacff76f20..fdcd8d4238 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -31,13 +31,12 @@ def is_quay_image(url: str) -> bool:
 IS_VLLM_IMAGE = False
 IS_VLLM_IMAGE_DEPLOYED=False
 RUN_SAVE_DIR=os.environ.get("RUN_SAVE_DIR", "none")
-#VLLM_VOLUME_MOUNT_DIR=os.environ.get("VLLM_VOLUME_MOUNT_DIR", "/opt/app-root/runs")
-# when using vllm image, needs to save the generated model and vllm command
+# when using vllm image, needs to save the generated model
 if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()):
     IS_VLLM_IMAGE = True
     if not is_quay_image(VLLM_PYTHON_ENV):
         IS_VLLM_IMAGE_DEPLOYED = True
-        assert RUN_SAVE_DIR != "none", "To use vllm image must set RUN_SAVE_DIR too!"
+        assert RUN_SAVE_DIR != "none", "To use vllm image, RUN_SAVE_DIR must be set!"
 
 TIMINGS_DIR = os.environ.get("TIMINGS_DIR", "timings/e2e-test_vllm")
 os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
@@ -92,7 +91,6 @@ def set_up(self, test_data_file: str):
         self.max_seq_length = eval_config.get("max_seq_length", 2048)
         # GPU memory utilization - only set if explicitly provided in config
         self.gpu_memory_utilization = eval_config.get("gpu_memory_utilization")
-        #self.is_vllm_image = IS_VLLM_IMAGE
         if VLLM_PYTHON_ENV.lower() == "same":
             self.vllm_env = sys.executable
         else:
@@ -101,7 +99,7 @@ def set_up(self, test_data_file: str):
         if RUN_SAVE_DIR != "none":
             assert Path(RUN_SAVE_DIR).exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}"
             self.run_save_dir = RUN_SAVE_DIR
-            # RUN_SAVE_DIR overwrites config save_dir
+            # RUN_SAVE_DIR overwrites config save_dir if specified
             self.save_dir = os.path.join(RUN_SAVE_DIR, self.model.split("/")[1] + f"-{self.scheme}")
 
         if not self.save_dir:
@@ -112,6 +110,7 @@ def set_up(self, test_data_file: str):
 
         # script to run vllm if using vllm image
         if IS_VLLM_IMAGE:
+            # script file containing vllm commands to run in the image
             self.vllm_bash = os.path.join(RUN_SAVE_DIR, "run-vllm.bash")
             logger.info(f"vllm bash save dir: {self.vllm_bash}")
 
@@ -126,9 +125,6 @@ def test_vllm(self, test_data_file: str):
         # Run vLLM with saved model
 
         self.set_up(test_data_file)
-        # not need this anymore?
-        #if not self.save_dir:
-        #    self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
         oneshot_model, tokenizer = run_oneshot_for_e2e_testing(
             model=self.model,
             model_class=self.model_class,
@@ -221,9 +217,6 @@ def _run_vllm(self, logger):
         import subprocess
 
         llm_kwargs = {"model": self.save_dir}
-        #if IS_VLLM_IMAGE:
-        #    llm_kwargs = {"model":
-        #        self.save_dir.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR)}
 
         if self.gpu_memory_utilization is not None:
             llm_kwargs["gpu_memory_utilization"] = self.gpu_memory_utilization
@@ -238,7 +231,6 @@ def _run_vllm(self, logger):
         logger.info(self.vllm_env)
 
         if IS_VLLM_IMAGE:
-            #run_file_path = os.path.join(VLLM_VOLUME_MOUNT_DIR, "run_vllm.py")
             run_file_path = os.path.join(RUN_SAVE_DIR, "run_vllm.py")
             shutil.copy(os.path.join(test_file_dir, "run_vllm.py"), 
                 os.path.join(RUN_SAVE_DIR, "run_vllm.py"))
@@ -253,38 +245,24 @@ def _run_vllm(self, logger):
                     """)
             os.chmod(self.vllm_bash, 0o755)
             logger.info(f"Wrote vllm cmd into {self.vllm_bash}:")
-            logger.info(vllm_cmd)
             if IS_VLLM_IMAGE_DEPLOYED:
                 logger.info("vllm image is deployed. Run vllm cmd with kubectl.")
-                cmds = [f"kubectl exec -it {VLLM_PYTHON_ENV} -n arc-runners",
-                        f"-- /bin/bash {RUN_SAVE_DIR}/run-vllm.bash"]
-                kubectl_cmd = " ".join(cmds)
-                logger.info(f"kubectl command: {kubectl_cmd}")
                 result = subprocess.Popen(
                     [
                      "kubectl", "exec", "-it",
                      VLLM_PYTHON_ENV, "-n", "arc-runners",
-                     "--", "/bin/bash", f"{RUN_SAVE_DIR}/run-vllm.bash",
+                     "--", "/bin/bash", self.vllm_bash,
                     ],
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                    text=True)
             else:
-                cmds = ["podman run --rm --device nvidia.com/gpu=all --entrypoint",
-                    #self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR),
-                    self.vllm_bash,
-                    "-v", #f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}",
-                    f"{RUN_SAVE_DIR}:{RUN_SAVE_DIR}",
-                    VLLM_PYTHON_ENV]
-                podman_cmd = " ".join(cmds)
-                logger.info(f"podman command: {podman_cmd}")
+                logger.info("vllm image is pulled. Run vllm cmd with podman.")
                 result = subprocess.Popen(
                     [
                      "podman", "run", "--rm",
                      "--device", "nvidia.com/gpu=all", "--entrypoint",
-                     #self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR),
-                     self.vllm_bash,
-                     "-v", #f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}",
+                     self.vllm_bash, "-v",
                      f"{RUN_SAVE_DIR}:{RUN_SAVE_DIR}",
                      VLLM_PYTHON_ENV,
                    ],

From 264fdcb206faa5ddfa898570ab99120799220829 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Thu, 13 Nov 2025 14:05:02 -0500
Subject: [PATCH 31/41] final update

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/e2e-smoke.list    | 6 ++++++
 tests/e2e/vLLM/lmeval-smoke.list | 6 ------
 tests/e2e/vLLM/test_vllm.py      | 5 +++--
 3 files changed, 9 insertions(+), 8 deletions(-)
 delete mode 100644 tests/e2e/vLLM/lmeval-smoke.list

diff --git a/tests/e2e/vLLM/e2e-smoke.list b/tests/e2e/vLLM/e2e-smoke.list
index 9737d8d37a..d66618aaec 100644
--- a/tests/e2e/vLLM/e2e-smoke.list
+++ b/tests/e2e/vLLM/e2e-smoke.list
@@ -1 +1,7 @@
+fp4_nvfp4.yaml
 fp8_dynamic_per_token.yaml
+kv_cache_gptq_tinyllama.yaml
+sparse2of4_fp8_dynamic.yaml
+w4a16_grouped_quant_asym_awq.yaml
+w4a16_actorder_weight.yaml
+int8_channel_weight_static_per_tensor_act.yaml
diff --git a/tests/e2e/vLLM/lmeval-smoke.list b/tests/e2e/vLLM/lmeval-smoke.list
deleted file mode 100644
index e7f42d4a1e..0000000000
--- a/tests/e2e/vLLM/lmeval-smoke.list
+++ /dev/null
@@ -1,6 +0,0 @@
-fp8_dynamic_per_token.yaml
-kv_cache_gptq_tinyllama.yaml
-sparse2of4_fp8_dynamic.yaml
-w4a16_grouped_quant_asym_awq.yaml
-w4a16_actorder_weight.yaml
-int8_channel_weight_static_per_tensor_act.yaml
diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index fdcd8d4238..9ca1c77ef3 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -26,7 +26,7 @@ def is_quay_image(url: str) -> bool:
     "TEST_DATA_FILE", "tests/e2e/vLLM/configs/int8_dynamic_per_token.yaml"
 )
 SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "")
-# vllm environment: image url, deployed runner name, same (default), or the path of vllm virtualenv
+# vllm environment: same (default), the path of vllm virtualenv, image url, deployed runner name
 VLLM_PYTHON_ENV = os.environ.get("VLLM_PYTHON_ENV", "same")
 IS_VLLM_IMAGE = False
 IS_VLLM_IMAGE_DEPLOYED=False
@@ -231,6 +231,7 @@ def _run_vllm(self, logger):
         logger.info(self.vllm_env)
 
         if IS_VLLM_IMAGE:
+            # generate python command to run in the vllm image
             run_file_path = os.path.join(RUN_SAVE_DIR, "run_vllm.py")
             shutil.copy(os.path.join(test_file_dir, "run_vllm.py"), 
                 os.path.join(RUN_SAVE_DIR, "run_vllm.py"))
@@ -257,7 +258,7 @@ def _run_vllm(self, logger):
                    stderr=subprocess.PIPE,
                    text=True)
             else:
-                logger.info("vllm image is pulled. Run vllm cmd with podman.")
+                logger.info("vllm image is pulled locally. Run vllm cmd with podman.")
                 result = subprocess.Popen(
                     [
                      "podman", "run", "--rm",

From 318bd3da6629802c06e66184d317939340728727 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Thu, 13 Nov 2025 17:37:47 -0500
Subject: [PATCH 32/41] clean up

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 45 ++++++++++---------------------------
 1 file changed, 12 insertions(+), 33 deletions(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 9ca1c77ef3..501fe4510f 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -16,27 +16,20 @@
 from tests.testing_utils import requires_gpu
 
 
-def is_quay_image(url: str) -> bool:
-    pattern = r"^quay\.io/[a-z0-9][a-z0-9-_]*/[a-z0-9][a-z0-9-_/]*:[\w][\w.-]*$"
-    return re.match(pattern, url) is not None
-
 HF_MODEL_HUB_NAME = "nm-testing"
 
 TEST_DATA_FILE = os.environ.get(
     "TEST_DATA_FILE", "tests/e2e/vLLM/configs/int8_dynamic_per_token.yaml"
 )
 SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "")
-# vllm environment: same (default), the path of vllm virtualenv, image url, deployed runner name
+# vllm environment: same (default), the path of vllm virtualenv, deployed runner name
 VLLM_PYTHON_ENV = os.environ.get("VLLM_PYTHON_ENV", "same")
 IS_VLLM_IMAGE = False
-IS_VLLM_IMAGE_DEPLOYED=False
 RUN_SAVE_DIR=os.environ.get("RUN_SAVE_DIR", "none")
 # when using vllm image, needs to save the generated model
 if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()):
     IS_VLLM_IMAGE = True
-    if not is_quay_image(VLLM_PYTHON_ENV):
-        IS_VLLM_IMAGE_DEPLOYED = True
-        assert RUN_SAVE_DIR != "none", "To use vllm image, RUN_SAVE_DIR must be set!"
+    assert RUN_SAVE_DIR != "none", "To use vllm image, RUN_SAVE_DIR must be set!"
 
 TIMINGS_DIR = os.environ.get("TIMINGS_DIR", "timings/e2e-test_vllm")
 os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
@@ -246,30 +239,16 @@ def _run_vllm(self, logger):
                     """)
             os.chmod(self.vllm_bash, 0o755)
             logger.info(f"Wrote vllm cmd into {self.vllm_bash}:")
-            if IS_VLLM_IMAGE_DEPLOYED:
-                logger.info("vllm image is deployed. Run vllm cmd with kubectl.")
-                result = subprocess.Popen(
-                    [
-                     "kubectl", "exec", "-it",
-                     VLLM_PYTHON_ENV, "-n", "arc-runners",
-                     "--", "/bin/bash", self.vllm_bash,
-                    ],
-                   stdout=subprocess.PIPE,
-                   stderr=subprocess.PIPE,
-                   text=True)
-            else:
-                logger.info("vllm image is pulled locally. Run vllm cmd with podman.")
-                result = subprocess.Popen(
-                    [
-                     "podman", "run", "--rm",
-                     "--device", "nvidia.com/gpu=all", "--entrypoint",
-                     self.vllm_bash, "-v",
-                     f"{RUN_SAVE_DIR}:{RUN_SAVE_DIR}",
-                     VLLM_PYTHON_ENV,
-                   ],
-                   stdout=subprocess.PIPE,
-                   stderr=subprocess.PIPE,
-                   text=True)
+            logger.info("vllm image. Run vllm cmd with kubectl.")
+            result = subprocess.Popen(
+                [
+                 "kubectl", "exec", "-it",
+                 VLLM_PYTHON_ENV, "-n", "arc-runners",
+                 "--", "/bin/bash", self.vllm_bash,
+                ],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True)
         else:
             run_file_path = os.path.join(test_file_dir, "run_vllm.py")
             logger.info("Run vllm in subprocess.Popen using python env:")

From 117ec9d9da2183d8949350debf6cd106ec379577 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Fri, 14 Nov 2025 15:54:23 -0500
Subject: [PATCH 33/41] fix quality failures

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 67 +++++++++++++++++++++++++++----------
 1 file changed, 49 insertions(+), 18 deletions(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 501fe4510f..1a93c5362e 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -15,7 +15,6 @@
 from tests.test_timer.timer_utils import get_singleton_manager, log_time
 from tests.testing_utils import requires_gpu
 
-
 HF_MODEL_HUB_NAME = "nm-testing"
 
 TEST_DATA_FILE = os.environ.get(
@@ -25,7 +24,7 @@
 # vllm environment: same (default), the path of vllm virtualenv, deployed runner name
 VLLM_PYTHON_ENV = os.environ.get("VLLM_PYTHON_ENV", "same")
 IS_VLLM_IMAGE = False
-RUN_SAVE_DIR=os.environ.get("RUN_SAVE_DIR", "none")
+RUN_SAVE_DIR = os.environ.get("RUN_SAVE_DIR", "none")
 # when using vllm image, needs to save the generated model
 if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()):
     IS_VLLM_IMAGE = True
@@ -40,6 +39,7 @@
     "tokenizer.json",
 ]
 
+
 # Will run each test case in its own process through run_tests.sh
 # emulating vLLM CI testing
 @requires_gpu(1)
@@ -63,7 +63,6 @@ class TestvLLM:
     be used for quantization. Otherwise, the recipe will always be used if given.
     """  # noqa: E501
 
-
     def set_up(self, test_data_file: str):
         eval_config = yaml.safe_load(Path(test_data_file).read_text(encoding="utf-8"))
 
@@ -90,10 +89,14 @@ def set_up(self, test_data_file: str):
             self.vllm_env = VLLM_PYTHON_ENV
 
         if RUN_SAVE_DIR != "none":
-            assert Path(RUN_SAVE_DIR).exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}"
+            assert Path(
+                RUN_SAVE_DIR
+            ).exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}"
             self.run_save_dir = RUN_SAVE_DIR
             # RUN_SAVE_DIR overwrites config save_dir if specified
-            self.save_dir = os.path.join(RUN_SAVE_DIR, self.model.split("/")[1] + f"-{self.scheme}")
+            self.save_dir = os.path.join(
+                RUN_SAVE_DIR, self.model.split("/")[1] + f"-{self.scheme}"
+            )
 
         if not self.save_dir:
             self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
@@ -174,7 +177,9 @@ def test_vllm(self, test_data_file: str):
             if VLLM_PYTHON_ENV.lower() == "same":
                 logger.info("========== RUNNING vLLM in the same python env ==========")
             else:
-                logger.info("========== RUNNING vLLM in a separate python env ==========")
+                logger.info(
+                    "========== RUNNING vLLM in a separate python env =========="
+                )
 
         self._run_vllm(logger)
 
@@ -182,7 +187,11 @@ def test_vllm(self, test_data_file: str):
 
     def tear_down(self):
         # model save_dir is needed for vllm image testing
-        if not IS_VLLM_IMAGE and self.save_dir is not None and os.path.isdir(self.save_dir):
+        if (
+            not IS_VLLM_IMAGE
+            and self.save_dir is not None
+            and os.path.isdir(self.save_dir)
+        ):
             shutil.rmtree(self.save_dir)
 
         timer = get_singleton_manager()
@@ -226,35 +235,57 @@ def _run_vllm(self, logger):
         if IS_VLLM_IMAGE:
             # generate python command to run in the vllm image
             run_file_path = os.path.join(RUN_SAVE_DIR, "run_vllm.py")
-            shutil.copy(os.path.join(test_file_dir, "run_vllm.py"), 
-                os.path.join(RUN_SAVE_DIR, "run_vllm.py"))
-            cmds = ["python", run_file_path, f"'{json_scheme}'",
-                    f"'{json_llm_kwargs}'", f"'{json_prompts}'"]
+            shutil.copy(
+                os.path.join(test_file_dir, "run_vllm.py"),
+                os.path.join(RUN_SAVE_DIR, "run_vllm.py"),
+            )
+            cmds = [
+                "python",
+                run_file_path,
+                f"'{json_scheme}'",
+                f"'{json_llm_kwargs}'",
+                f"'{json_prompts}'",
+            ]
             vllm_cmd = " ".join(cmds)
             with open(self.vllm_bash, "w") as cf:
-                cf.write(f"""#!/bin/bash
+                cf.write(
+                    f"""#!/bin/bash
                     export HF_HUB_OFFLINE=0
                     export VLLM_NO_USAGE_STATS=1
                     {vllm_cmd}
-                    """)
+                    """
+                )
             os.chmod(self.vllm_bash, 0o755)
             logger.info(f"Wrote vllm cmd into {self.vllm_bash}:")
             logger.info("vllm image. Run vllm cmd with kubectl.")
             result = subprocess.Popen(
                 [
-                 "kubectl", "exec", "-it",
-                 VLLM_PYTHON_ENV, "-n", "arc-runners",
-                 "--", "/bin/bash", self.vllm_bash,
+                    "kubectl",
+                    "exec",
+                    "-it",
+                    VLLM_PYTHON_ENV,
+                    "-n",
+                    "arc-runners",
+                    "--",
+                    "/bin/bash",
+                    self.vllm_bash,
                 ],
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
-                text=True)
+                text=True,
+            )
         else:
             run_file_path = os.path.join(test_file_dir, "run_vllm.py")
             logger.info("Run vllm in subprocess.Popen using python env:")
             logger.info(self.vllm_env)
             result = subprocess.Popen(
-                [self.vllm_env, run_file_path, json_scheme, json_llm_kwargs, json_prompts],
+                [
+                    self.vllm_env,
+                    run_file_path,
+                    json_scheme,
+                    json_llm_kwargs,
+                    json_prompts,
+                ],
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
                 text=True,

From 8b41d5f0991aeaf7223d4460917aea3fbc01ec85 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Mon, 24 Nov 2025 16:59:06 -0500
Subject: [PATCH 34/41] reorg test code and remove env var

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/run_tests.sh           | 12 +---
 tests/e2e/vLLM/run_tests_in_rhaiis.sh | 81 +++++++++++++++++++++++++++
 tests/e2e/vLLM/test_vllm.py           | 79 ++++++++++----------------
 3 files changed, 112 insertions(+), 60 deletions(-)
 create mode 100644 tests/e2e/vLLM/run_tests_in_rhaiis.sh

diff --git a/tests/e2e/vLLM/run_tests.sh b/tests/e2e/vLLM/run_tests.sh
index 1f94cf06a3..9a2fcec9cd 100644
--- a/tests/e2e/vLLM/run_tests.sh
+++ b/tests/e2e/vLLM/run_tests.sh
@@ -16,18 +16,8 @@ while getopts "c:t:" OPT; do
   esac
 done
 
-script_path=$(dirname "${BASH_SOURCE[0]}")
-if [ -d "$CONFIG" ]; then
-    echo "Config is provided as a folder: $CONFIG"
-    CONFIGS=`ls "$CONFIG"`
-elif [ -f "$CONFIG" ]; then
-    echo "Config is provided as a file: $CONFIG"
-    CONFIGS=`cat "$CONFIG"`
-fi
-echo "$CONFIGS"
-
 # Parse list of configs.
-for MODEL_CONFIG in $(echo -e "$CONFIGS" | sed "s|^|${script_path}/configs/|")
+for MODEL_CONFIG in "$CONFIG"/*
 do
     LOCAL_SUCCESS=0
 
diff --git a/tests/e2e/vLLM/run_tests_in_rhaiis.sh b/tests/e2e/vLLM/run_tests_in_rhaiis.sh
new file mode 100644
index 0000000000..2c1867f428
--- /dev/null
+++ b/tests/e2e/vLLM/run_tests_in_rhaiis.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+usage() {
+  echo "Usage: $0 -c <config> -t <test> -s <save_dir>"
+  exit 1
+}
+
+while getopts "c:t:s:" OPT; do
+  case ${OPT} in
+    c )
+        CONFIG="$OPTARG"
+        ;;
+    t )
+        TEST="$OPTARG"
+        ;;
+    s )
+        SAVE_DIR="$OPTARG"
+        ;;
+    \? )
+        exit 1
+        ;;
+  esac
+done
+
+if [[ -z "$CONFIG" || -z "$TEST" || -z "$SAVE_DIR" ]]; then
+  echo "Error: -c, -t, and -s are required."
+  usage
+fi
+
+script_path=$(dirname "${BASH_SOURCE[0]}")
+if [ -d "$CONFIG" ]; then
+    echo "Config is provided as a folder: $CONFIG"
+    CONFIGS=`ls "$CONFIG"`
+elif [ -f "$CONFIG" ]; then
+    echo "Config is provided as a file: $CONFIG"
+    CONFIGS=`cat "$CONFIG"`
+fi
+
+SUCCESS=0
+
+# Parse list of configs and add save_dir
+rm -rf $SAVE_DIR/configs
+mkdir -p $SAVE_DIR/configs
+for MODEL_CONFIG in $(echo -e "$CONFIGS" | sed "s|^|${script_path}/configs/|")
+do
+    FILE_NAME=$(basename $MODEL_CONFIG)
+    CONFIG_FILE=$SAVE_DIR/configs/$FILE_NAME
+
+    save_dir=$(cat $MODEL_CONFIG | grep 'save_dir:' | cut -d' ' -f2)
+    model=$(cat $MODEL_CONFIG | grep 'model:' | cut -d'/' -f2)
+    scheme=$(cat $MODEL_CONFIG | grep 'scheme:' | cut -d' ' -f2)
+
+    # add or overwrite save_dir for each model
+    if [[ -z "$save_dir" ]]; then
+      { cat $MODEL_CONFIG; echo -e "\nsave_dir: $SAVE_DIR/$model-$scheme"; } > $CONFIG_FILE
+    else
+      { cat $MODEL_CONFIG | grep -v 'save_dir'; echo "save_dir: $SAVE_DIR/$save_dir"; } > $CONFIG_FILE
+    fi
+
+    #{ cat $MODEL_CONFIG | grep -v 'save_dir'; echo "save_dir: $SAVE_DIR"; } > $CONFIG_FILE
+
+    echo "=== RUNNING MODEL: $CONFIG_FILE ==="
+    cat $CONFIG_FILE
+
+    LOCAL_SUCCESS=0
+    export TEST_DATA_FILE="$CONFIG_FILE"
+    pytest \
+        --capture=tee-sys \
+        "$TEST" || LOCAL_SUCCESS=$?
+
+    if [[ $LOCAL_SUCCESS == 0 ]]; then
+        echo "=== PASSED MODEL: $CONFIG_FILE ==="
+    else
+        echo "=== FAILED MODEL: $CONFIG_FILE ==="
+    fi
+
+    SUCCESS=$((SUCCESS + LOCAL_SUCCESS))
+
+done
+
+exit "$SUCCESS"
diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 1a93c5362e..e432a5f001 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -1,3 +1,4 @@
+
 import os
 import re
 import shutil
@@ -21,15 +22,11 @@
     "TEST_DATA_FILE", "tests/e2e/vLLM/configs/int8_dynamic_per_token.yaml"
 )
 SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "")
-# vllm environment: same (default), the path of vllm virtualenv, deployed runner name
+# vllm python environment
 VLLM_PYTHON_ENV = os.environ.get("VLLM_PYTHON_ENV", "same")
 IS_VLLM_IMAGE = False
-RUN_SAVE_DIR = os.environ.get("RUN_SAVE_DIR", "none")
-# when using vllm image, needs to save the generated model
 if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()):
     IS_VLLM_IMAGE = True
-    assert RUN_SAVE_DIR != "none", "To use vllm image, RUN_SAVE_DIR must be set!"
-
 TIMINGS_DIR = os.environ.get("TIMINGS_DIR", "timings/e2e-test_vllm")
 os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
 EXPECTED_SAVED_FILES = [
@@ -83,32 +80,18 @@ def set_up(self, test_data_file: str):
         self.max_seq_length = eval_config.get("max_seq_length", 2048)
         # GPU memory utilization - only set if explicitly provided in config
         self.gpu_memory_utilization = eval_config.get("gpu_memory_utilization")
-        if VLLM_PYTHON_ENV.lower() == "same":
-            self.vllm_env = sys.executable
-        else:
+        # vllm python env - if same, use the current python env, otherwise use
+        # the python passed in VLLM_PYTHON_ENV
+        if VLLM_PYTHON_ENV.lower() != "same":
             self.vllm_env = VLLM_PYTHON_ENV
-
-        if RUN_SAVE_DIR != "none":
-            assert Path(
-                RUN_SAVE_DIR
-            ).exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}"
-            self.run_save_dir = RUN_SAVE_DIR
-            # RUN_SAVE_DIR overwrites config save_dir if specified
-            self.save_dir = os.path.join(
-                RUN_SAVE_DIR, self.model.split("/")[1] + f"-{self.scheme}"
-            )
+        else:
+            self.vllm_env = sys.executable
 
         if not self.save_dir:
             self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
 
         logger.info("========== RUNNING ==============")
-        logger.info(f"model save dir: {self.save_dir}")
-
-        # script to run vllm if using vllm image
-        if IS_VLLM_IMAGE:
-            # script file containing vllm commands to run in the image
-            self.vllm_bash = os.path.join(RUN_SAVE_DIR, "run-vllm.bash")
-            logger.info(f"vllm bash save dir: {self.vllm_bash}")
+        logger.info(self.save_dir)
 
         self.prompts = [
             "The capital of France is",
@@ -117,9 +100,7 @@ def set_up(self, test_data_file: str):
         ]
         self.api = HfApi()
 
-    def test_vllm(self, test_data_file: str):
-        # Run vLLM with saved model
-
+    def compress_model(self, test_data_file: str):
         self.set_up(test_data_file)
         oneshot_model, tokenizer = run_oneshot_for_e2e_testing(
             model=self.model,
@@ -133,12 +114,16 @@ def test_vllm(self, test_data_file: str):
             recipe=self.recipe,
             quant_type=self.quant_type,
         )
+        self.oneshot_model = oneshot_model
+        self.tokenizer = tokenizer
 
         # check that session contains recipe
         self._check_session_contains_recipe()
 
+    def save_compressed_model(self):
+
         logger.info("================= SAVING TO DISK ======================")
-        self._save_compressed_model(oneshot_model=oneshot_model, tokenizer=tokenizer)
+        self._save_compressed_model(oneshot_model=self.oneshot_model, tokenizer=self.tokenizer)
 
         recipe_path = os.path.join(self.save_dir, "recipe.yaml")
 
@@ -153,8 +138,7 @@ def test_vllm(self, test_data_file: str):
             fp.write(recipe_yaml_str)
         session.reset()
 
-        # if vllm image is used, don't upload
-        if SKIP_HF_UPLOAD.lower() != "yes" and not IS_VLLM_IMAGE:
+        if SKIP_HF_UPLOAD.lower() != "yes":
             logger.info("================= UPLOADING TO HUB ======================")
 
             stub = f"{HF_MODEL_HUB_NAME}/{self.save_dir}-e2e"
@@ -171,27 +155,22 @@ def test_vllm(self, test_data_file: str):
                 folder_path=self.save_dir,
             )
 
+    def test_vllm(self):
+        # Run vLLM with saved model
+
         if IS_VLLM_IMAGE:
-            logger.info("========== To run vLLM with vllm image ==========")
+            logger.info("========== RUNNING vLLM in RHAIIS vllm image ==========")
+        elif VLLM_PYTHON_ENV.lower() == "same":
+            logger.info("========== RUNNING vLLM in the same python env ==========")
         else:
-            if VLLM_PYTHON_ENV.lower() == "same":
-                logger.info("========== RUNNING vLLM in the same python env ==========")
-            else:
-                logger.info(
-                    "========== RUNNING vLLM in a separate python env =========="
-                )
+            logger.info("========== RUNNING vLLM in a separate python env ==========")
 
         self._run_vllm(logger)
 
         self.tear_down()
 
     def tear_down(self):
-        # model save_dir is needed for vllm image testing
-        if (
-            not IS_VLLM_IMAGE
-            and self.save_dir is not None
-            and os.path.isdir(self.save_dir)
-        ):
+        if self.save_dir is not None and os.path.isdir(self.save_dir):
             shutil.rmtree(self.save_dir)
 
         timer = get_singleton_manager()
@@ -229,11 +208,12 @@ def _run_vllm(self, logger):
 
         test_file_dir = os.path.dirname(os.path.abspath(__file__))
 
-        logger.info("Run vllm using env:")
+        logger.info("Run vllm in subprocess.Popen() using python env:")
         logger.info(self.vllm_env)
 
         if IS_VLLM_IMAGE:
             # generate python command to run in the vllm image
+            RUN_SAVE_DIR = os.path.dirname(self.save_dir)
             run_file_path = os.path.join(RUN_SAVE_DIR, "run_vllm.py")
             shutil.copy(
                 os.path.join(test_file_dir, "run_vllm.py"),
@@ -247,7 +227,8 @@ def _run_vllm(self, logger):
                 f"'{json_prompts}'",
             ]
             vllm_cmd = " ".join(cmds)
-            with open(self.vllm_bash, "w") as cf:
+            vllm_bash = os.path.join(RUN_SAVE_DIR, "run-vllm.bash")
+            with open(vllm_bash, "w") as cf:
                 cf.write(
                     f"""#!/bin/bash
                     export HF_HUB_OFFLINE=0
@@ -255,8 +236,8 @@ def _run_vllm(self, logger):
                     {vllm_cmd}
                     """
                 )
-            os.chmod(self.vllm_bash, 0o755)
-            logger.info(f"Wrote vllm cmd into {self.vllm_bash}:")
+            os.chmod(vllm_bash, 0o755)
+            logger.info(f"Wrote vllm cmd into {vllm_bash}:")
             logger.info("vllm image. Run vllm cmd with kubectl.")
             result = subprocess.Popen(
                 [
@@ -268,7 +249,7 @@ def _run_vllm(self, logger):
                     "arc-runners",
                     "--",
                     "/bin/bash",
-                    self.vllm_bash,
+                    vllm_bash,
                 ],
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,

From 1b2530e9612ae458b0eb5907bd6df179acd66c94 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Tue, 25 Nov 2025 09:42:22 -0500
Subject: [PATCH 35/41] fix error

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index e432a5f001..9b8f6027cd 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -155,9 +155,15 @@ def save_compressed_model(self):
                 folder_path=self.save_dir,
             )
 
-    def test_vllm(self):
-        # Run vLLM with saved model
+    def test_vllm(self, test_data_file: str):
+
+        self.set_up(self, test_data_file)
+
+        self.compress_model(self, test_data_file)
 
+        self.save_compressed_model(self)
+
+        # Run vLLM with saved model
         if IS_VLLM_IMAGE:
             logger.info("========== RUNNING vLLM in RHAIIS vllm image ==========")
         elif VLLM_PYTHON_ENV.lower() == "same":

From 3d889c6838c49d75fe2bbae7870d551e3c7f7c4c Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Tue, 25 Nov 2025 10:42:05 -0500
Subject: [PATCH 36/41] fix another error

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 9b8f6027cd..e13878ef89 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -157,11 +157,11 @@ def save_compressed_model(self):
 
     def test_vllm(self, test_data_file: str):
 
-        self.set_up(self, test_data_file)
+        self.set_up(test_data_file)
 
-        self.compress_model(self, test_data_file)
+        self.compress_model(test_data_file)
 
-        self.save_compressed_model(self)
+        self.save_compressed_model()
 
         # Run vLLM with saved model
         if IS_VLLM_IMAGE:

From 7e772026143a3ee848fd1d022897954b970d77be Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Tue, 25 Nov 2025 11:30:35 -0500
Subject: [PATCH 37/41] fix style

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index e13878ef89..5b963a641b 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -123,7 +123,8 @@ def compress_model(self, test_data_file: str):
     def save_compressed_model(self):
 
         logger.info("================= SAVING TO DISK ======================")
-        self._save_compressed_model(oneshot_model=self.oneshot_model, tokenizer=self.tokenizer)
+        self._save_compressed_model(oneshot_model=self.oneshot_model,
+            tokenizer=self.tokenizer)
 
         recipe_path = os.path.join(self.save_dir, "recipe.yaml")
 

From 7662699bd6dfb565d9227752ea54fbd8face281a Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Tue, 25 Nov 2025 11:46:53 -0500
Subject: [PATCH 38/41] clean up and fix format

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/run_tests_in_rhaiis.sh |  2 --
 tests/e2e/vLLM/test_vllm.py           | 12 +++---------
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/tests/e2e/vLLM/run_tests_in_rhaiis.sh b/tests/e2e/vLLM/run_tests_in_rhaiis.sh
index 2c1867f428..6f30028f21 100644
--- a/tests/e2e/vLLM/run_tests_in_rhaiis.sh
+++ b/tests/e2e/vLLM/run_tests_in_rhaiis.sh
@@ -57,8 +57,6 @@ do
       { cat $MODEL_CONFIG | grep -v 'save_dir'; echo "save_dir: $SAVE_DIR/$save_dir"; } > $CONFIG_FILE
     fi
 
-    #{ cat $MODEL_CONFIG | grep -v 'save_dir'; echo "save_dir: $SAVE_DIR"; } > $CONFIG_FILE
-
     echo "=== RUNNING MODEL: $CONFIG_FILE ==="
     cat $CONFIG_FILE
 
diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 5b963a641b..d9937312ca 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -1,4 +1,3 @@
-
 import os
 import re
 import shutil
@@ -121,10 +120,10 @@ def compress_model(self, test_data_file: str):
         self._check_session_contains_recipe()
 
     def save_compressed_model(self):
-
         logger.info("================= SAVING TO DISK ======================")
-        self._save_compressed_model(oneshot_model=self.oneshot_model,
-            tokenizer=self.tokenizer)
+        self._save_compressed_model(
+            oneshot_model=self.oneshot_model, tokenizer=self.tokenizer
+        )
 
         recipe_path = os.path.join(self.save_dir, "recipe.yaml")
 
@@ -158,8 +157,6 @@ def save_compressed_model(self):
 
     def test_vllm(self, test_data_file: str):
 
-        self.set_up(test_data_file)
-
         self.compress_model(test_data_file)
 
         self.save_compressed_model()
@@ -215,9 +212,6 @@ def _run_vllm(self, logger):
 
         test_file_dir = os.path.dirname(os.path.abspath(__file__))
 
-        logger.info("Run vllm in subprocess.Popen() using python env:")
-        logger.info(self.vllm_env)
-
         if IS_VLLM_IMAGE:
             # generate python command to run in the vllm image
             RUN_SAVE_DIR = os.path.dirname(self.save_dir)

From abb6bab271ee067f8f1fb2599cfd246c1b353bcb Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Tue, 25 Nov 2025 11:54:31 -0500
Subject: [PATCH 39/41] fix format

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/test_vllm.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index d9937312ca..9e19a1ef19 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -156,7 +156,6 @@ def save_compressed_model(self):
             )
 
     def test_vllm(self, test_data_file: str):
-
         self.compress_model(test_data_file)
 
         self.save_compressed_model()

From de58b026a50c88d97b8a9089c0485daa6996c17f Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Tue, 25 Nov 2025 12:46:39 -0500
Subject: [PATCH 40/41] rename file to be rhaiis specific

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/{e2e-smoke.list => rhaiis-e2e-smoke.list} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/e2e/vLLM/{e2e-smoke.list => rhaiis-e2e-smoke.list} (100%)

diff --git a/tests/e2e/vLLM/e2e-smoke.list b/tests/e2e/vLLM/rhaiis-e2e-smoke.list
similarity index 100%
rename from tests/e2e/vLLM/e2e-smoke.list
rename to tests/e2e/vLLM/rhaiis-e2e-smoke.list

From 984a8cc775aace5606ff18a754197b971dc61221 Mon Sep 17 00:00:00 2001
From: Dan Huang <dahuang@redhat.com>
Date: Tue, 2 Dec 2025 14:31:59 -0500
Subject: [PATCH 41/41] rename run_tests.sh to run_tests_in_python.sh

Signed-off-by: Dan Huang <dahuang@redhat.com>
---
 tests/e2e/vLLM/{run_tests.sh => run_tests_in_python.sh} | 0
 tests/e2e/vLLM/test_vllm.py                             | 2 +-
 tests/lmeval/test_lmeval.py                             | 2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename tests/e2e/vLLM/{run_tests.sh => run_tests_in_python.sh} (100%)

diff --git a/tests/e2e/vLLM/run_tests.sh b/tests/e2e/vLLM/run_tests_in_python.sh
similarity index 100%
rename from tests/e2e/vLLM/run_tests.sh
rename to tests/e2e/vLLM/run_tests_in_python.sh
diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 9e19a1ef19..066affc69e 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -36,7 +36,7 @@
 ]
 
 
-# Will run each test case in its own process through run_tests.sh
+# Will run each test case in its own process through run_tests_in_python.sh
 # emulating vLLM CI testing
 @requires_gpu(1)
 @pytest.mark.parametrize(
diff --git a/tests/lmeval/test_lmeval.py b/tests/lmeval/test_lmeval.py
index a44cd042ff..662e9bb59a 100644
--- a/tests/lmeval/test_lmeval.py
+++ b/tests/lmeval/test_lmeval.py
@@ -44,7 +44,7 @@ class LmEvalConfig(BaseModel):
 TIMINGS_DIR = os.environ.get("TIMINGS_DIR", "timings/lm-eval")
 
 
-# Will run each test case in its own process through run_tests.sh
+# Will run each test case in its own process through run_tests_in_python.sh
 # emulating vLLM CI testing
 @requires_gpu(1)
 @pytest.mark.parametrize(