From f636e2ca3d09d0ac3712a2bb4aed21f549eb7f4a Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Wed, 29 Oct 2025 17:20:59 -0400 Subject: [PATCH 01/41] Allow using vllm image Signed-off-by: Dan Huang --- tests/e2e/vLLM/e2e-smoke.list | 6 +++ tests/e2e/vLLM/run_tests.sh | 11 ++++- tests/e2e/vLLM/test_vllm.py | 81 ++++++++++++++++++++++++----------- 3 files changed, 72 insertions(+), 26 deletions(-) create mode 100644 tests/e2e/vLLM/e2e-smoke.list diff --git a/tests/e2e/vLLM/e2e-smoke.list b/tests/e2e/vLLM/e2e-smoke.list new file mode 100644 index 0000000000..e7f42d4a1e --- /dev/null +++ b/tests/e2e/vLLM/e2e-smoke.list @@ -0,0 +1,6 @@ +fp8_dynamic_per_token.yaml +kv_cache_gptq_tinyllama.yaml +sparse2of4_fp8_dynamic.yaml +w4a16_grouped_quant_asym_awq.yaml +w4a16_actorder_weight.yaml +int8_channel_weight_static_per_tensor_act.yaml diff --git a/tests/e2e/vLLM/run_tests.sh b/tests/e2e/vLLM/run_tests.sh index 9a2fcec9cd..988319da55 100644 --- a/tests/e2e/vLLM/run_tests.sh +++ b/tests/e2e/vLLM/run_tests.sh @@ -16,8 +16,17 @@ while getopts "c:t:" OPT; do esac done +script_path=$(dirname "${BASH_SOURCE[0]}") +if [ -d "$CONFIG" ]; then + echo "Config is provided as a folder: $CONFIG" + CONFIGS=`ls "$CONFIG"` +elif [ -f "$CONFIG" ]; then + echo "Config is provided as a file: $CONFIG" + CONFIGS=`cat "$CONFIG"` +fi + # Parse list of configs. -for MODEL_CONFIG in "$CONFIG"/* +for MODEL_CONFIG in $(echo -e "$CONFIGS" | sed "s|^|${script_path}/configs/|") do LOCAL_SUCCESS=0 diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 9c099a5aea..507b588ed6 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -21,8 +21,9 @@ "TEST_DATA_FILE", "tests/e2e/vLLM/configs/int8_dynamic_per_token.yaml" ) SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "") -# vllm python environment +# vllm environment: image url, same (default), or the path of vllm virtualenv VLLM_PYTHON_ENV = os.environ.get("VLLM_PYTHON_ENV", "same") +RUN_SAVE_DIR=os.environ.get("RUN_SAVE_DIR", "none") TIMINGS_DIR = os.environ.get("TIMINGS_DIR", "timings/e2e-test_vllm") os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" EXPECTED_SAVED_FILES = [ @@ -31,7 +32,11 @@ "recipe.yaml", "tokenizer.json", ] - +IS_VLLM_IMAGE = false +# when using vllm image, needs to save the generated model and vllm command +if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exist()): + IS_VLLM_IMAGE = true + assert RUN_SAVE_DIR != "none", "To use vllm image must set RUN_SAVE_DIR too!" # Will run each test case in its own process through run_tests.sh # emulating vLLM CI testing @@ -76,18 +81,29 @@ def set_up(self, test_data_file: str): self.max_seq_length = eval_config.get("max_seq_length", 2048) # GPU memory utilization - only set if explicitly provided in config self.gpu_memory_utilization = eval_config.get("gpu_memory_utilization") - # vllm python env - if same, use the current python env, otherwise use - # the python passed in VLLM_PYTHON_ENV - if VLLM_PYTHON_ENV.lower() != "same": - self.vllm_env = VLLM_PYTHON_ENV - else: + self.is_vllm_image = IS_VLLM_IMAGE + if VLLM_PYTHON_ENV.lower() == "same": self.vllm_env = sys.executable + else: + self.vllm_env = VLLM_PYTHON_ENV + + if RUN_SAVE_DIR != "none": + assert sd_path.exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}" + self.run_save_dir = RUN_SAVE_DIR if not self.save_dir: - self.save_dir = self.model.split("/")[1] + f"-{self.scheme}" + if RUN_SAVE_DIR != "none": + self.save_dir = os.path.join(RUN_SAVE_DIR, self.model.split("/")[1] + f"-{self.scheme}") + else: + self.save_dir = self.model.split("/")[1] + f"-{self.scheme}" logger.info("========== RUNNING ==============") - logger.info(self.save_dir) + logger.info(f"model save dir: {self.save_dir}") + + # command file to run vllm if using vllm image + if self.is_vllm_image: + self.vllm_cmd_file = os.path.join(RUN_SAVE_DIR, "vllm.cmd") + logger.info(f"vllm cmd file save dir: {self.vllm_cmd_file}") self.prompts = [ "The capital of France is", @@ -100,8 +116,9 @@ def test_vllm(self, test_data_file: str): # Run vLLM with saved model self.set_up(test_data_file) - if not self.save_dir: - self.save_dir = self.model.split("/")[1] + f"-{self.scheme}" + # not need this anymore? + #if not self.save_dir: + # self.save_dir = self.model.split("/")[1] + f"-{self.scheme}" oneshot_model, tokenizer = run_oneshot_for_e2e_testing( model=self.model, model_class=self.model_class, @@ -151,10 +168,13 @@ def test_vllm(self, test_data_file: str): folder_path=self.save_dir, ) - if VLLM_PYTHON_ENV.lower() == "same": - logger.info("========== RUNNING vLLM in the same python env ==========") + if self.is_vllm_image: + logger.info("========== To run vLLM with vllm image ==========") else: - logger.info("========== RUNNING vLLM in a separate python env ==========") + if VLLM_PYTHON_ENV.lower() == "same": + logger.info("========== RUNNING vLLM in the same python env ==========") + else: + logger.info("========== RUNNING vLLM in a separate python env ==========") self._run_vllm(logger) @@ -200,20 +220,31 @@ def _run_vllm(self, logger): test_file_dir = os.path.dirname(os.path.abspath(__file__)) run_file_path = os.path.join(test_file_dir, "run_vllm.py") - logger.info("Run vllm in subprocess.Popen() using python env:") + logger.info("Run vllm using python env:") logger.info(self.vllm_env) - result = subprocess.Popen( - [self.vllm_env, run_file_path, json_scheme, json_llm_kwargs, json_prompts], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - ) - stdout, stderr = result.communicate() - logger.info(stdout) + if self.is_vllm_image: + vllm_cmd = " ".join( + "python", run_file_path, f"'{json_scheme}'", + f"'{json_llm_kwargs}'", f"'{json_prompts}'") + with open(self.vllm_cmd_file, "a") as cf: + cf.write(vllm_cmd) + logger.info(f"Wrote vllm cmd into {vllm_cmd_file}:") + logger.info(vllm_cmd) + else: + logger.info("Run vllm in subprocess.Popen using python env:") + logger.info(self.vllm_env) + result = subprocess.Popen( + [self.vllm_env, run_file_path, json_scheme, json_llm_kwargs, json_prompts], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + stdout, stderr = result.communicate() + logger.info(stdout) - error_msg = f"ERROR: vLLM failed with exit code {result.returncode}: {stderr}" - assert result.returncode == 0, error_msg + error_msg = f"ERROR: vLLM failed with exit code {result.returncode}: {stderr}" + assert result.returncode == 0, error_msg def _check_session_contains_recipe(self) -> None: session = active_session() From afbf81197206f8c96e16c54150581be79ccac36f Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Fri, 31 Oct 2025 10:29:20 -0400 Subject: [PATCH 02/41] fix a typo Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 507b588ed6..b1c1ab065b 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -32,10 +32,10 @@ "recipe.yaml", "tokenizer.json", ] -IS_VLLM_IMAGE = false +IS_VLLM_IMAGE = False # when using vllm image, needs to save the generated model and vllm command if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exist()): - IS_VLLM_IMAGE = true + IS_VLLM_IMAGE = True assert RUN_SAVE_DIR != "none", "To use vllm image must set RUN_SAVE_DIR too!" # Will run each test case in its own process through run_tests.sh From a55e5c84d29b49b05b51fdcf6f75cafcc5fe1042 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Fri, 31 Oct 2025 11:01:05 -0400 Subject: [PATCH 03/41] fix typo again Signed-off-by: Dan Huang --- tests/e2e/vLLM/run_tests.sh | 1 + tests/e2e/vLLM/test_vllm.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/e2e/vLLM/run_tests.sh b/tests/e2e/vLLM/run_tests.sh index 988319da55..1f94cf06a3 100644 --- a/tests/e2e/vLLM/run_tests.sh +++ b/tests/e2e/vLLM/run_tests.sh @@ -24,6 +24,7 @@ elif [ -f "$CONFIG" ]; then echo "Config is provided as a file: $CONFIG" CONFIGS=`cat "$CONFIG"` fi +echo "$CONFIGS" # Parse list of configs. for MODEL_CONFIG in $(echo -e "$CONFIGS" | sed "s|^|${script_path}/configs/|") diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index b1c1ab065b..6dcf77b426 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -34,7 +34,7 @@ ] IS_VLLM_IMAGE = False # when using vllm image, needs to save the generated model and vllm command -if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exist()): +if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()): IS_VLLM_IMAGE = True assert RUN_SAVE_DIR != "none", "To use vllm image must set RUN_SAVE_DIR too!" From ceee6817c8d390a5342f56bf6e678503ce0cf4fa Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Fri, 31 Oct 2025 11:04:24 -0400 Subject: [PATCH 04/41] fix an issue Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 6dcf77b426..b41ae7b6c7 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -88,7 +88,7 @@ def set_up(self, test_data_file: str): self.vllm_env = VLLM_PYTHON_ENV if RUN_SAVE_DIR != "none": - assert sd_path.exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}" + assert RUN_SAVE_DIR.exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}" self.run_save_dir = RUN_SAVE_DIR if not self.save_dir: From bcc7a507b646b54a53c6b45e89bf7f130b161389 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Fri, 31 Oct 2025 11:05:52 -0400 Subject: [PATCH 05/41] fix an issue Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index b41ae7b6c7..020438f180 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -88,7 +88,7 @@ def set_up(self, test_data_file: str): self.vllm_env = VLLM_PYTHON_ENV if RUN_SAVE_DIR != "none": - assert RUN_SAVE_DIR.exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}" + assert Path(RUN_SAVE_DIR).exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}" self.run_save_dir = RUN_SAVE_DIR if not self.save_dir: From 665cd1eb64db76b1308e8b15b50a8472f346e020 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Fri, 31 Oct 2025 11:21:14 -0400 Subject: [PATCH 06/41] fix cmd string Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 020438f180..6c57863cd9 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -224,9 +224,9 @@ def _run_vllm(self, logger): logger.info(self.vllm_env) if self.is_vllm_image: - vllm_cmd = " ".join( - "python", run_file_path, f"'{json_scheme}'", - f"'{json_llm_kwargs}'", f"'{json_prompts}'") + cmds = ["python", run_file_path, f"'{json_scheme}'", + f"'{json_llm_kwargs}'", f"'{json_prompts}'"] + vllm_cmd = " ".join(cmds) with open(self.vllm_cmd_file, "a") as cf: cf.write(vllm_cmd) logger.info(f"Wrote vllm cmd into {vllm_cmd_file}:") From 4bf0dc1552c6eccd7146dbbc75d010428d5894b1 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Fri, 31 Oct 2025 11:27:00 -0400 Subject: [PATCH 07/41] fix an issue Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 6c57863cd9..9a761c2be5 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -220,7 +220,7 @@ def _run_vllm(self, logger): test_file_dir = os.path.dirname(os.path.abspath(__file__)) run_file_path = os.path.join(test_file_dir, "run_vllm.py") - logger.info("Run vllm using python env:") + logger.info("Run vllm using env:") logger.info(self.vllm_env) if self.is_vllm_image: @@ -229,7 +229,7 @@ def _run_vllm(self, logger): vllm_cmd = " ".join(cmds) with open(self.vllm_cmd_file, "a") as cf: cf.write(vllm_cmd) - logger.info(f"Wrote vllm cmd into {vllm_cmd_file}:") + logger.info(f"Wrote vllm cmd into {self.vllm_cmd_file}:") logger.info(vllm_cmd) else: logger.info("Run vllm in subprocess.Popen using python env:") From 59cea151e5ced5b98bd93f5509f08e578dacb242 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Fri, 31 Oct 2025 16:56:53 -0400 Subject: [PATCH 08/41] add debugging Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 9a761c2be5..c570bac384 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -168,6 +168,7 @@ def test_vllm(self, test_data_file: str): folder_path=self.save_dir, ) + logger.info(f"Before vllm starts, here is self.save_dir: {self.save_dir}") if self.is_vllm_image: logger.info("========== To run vLLM with vllm image ==========") else: @@ -228,7 +229,7 @@ def _run_vllm(self, logger): f"'{json_llm_kwargs}'", f"'{json_prompts}'"] vllm_cmd = " ".join(cmds) with open(self.vllm_cmd_file, "a") as cf: - cf.write(vllm_cmd) + cf.write(vllm_cmd + "\n") logger.info(f"Wrote vllm cmd into {self.vllm_cmd_file}:") logger.info(vllm_cmd) else: From be75c8d978fb4bd2a5d99d54fc43d471fe85466a Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Mon, 3 Nov 2025 14:21:10 -0500 Subject: [PATCH 09/41] don't delete run folder if using image Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index c570bac384..7796994c5f 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -182,7 +182,7 @@ def test_vllm(self, test_data_file: str): self.tear_down() def tear_down(self): - if self.save_dir is not None and os.path.isdir(self.save_dir): + if not IS_VLLM_IMAGE and self.save_dir is not None and os.path.isdir(self.save_dir): shutil.rmtree(self.save_dir) timer = get_singleton_manager() From 586dcc18357a75f819cc7cbd53e73d91e90f714c Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Wed, 5 Nov 2025 14:39:50 -0500 Subject: [PATCH 10/41] allow using pulled image or deployed runner Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 66 ++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 7796994c5f..7cc6f37ce0 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -15,15 +15,30 @@ from tests.test_timer.timer_utils import get_singleton_manager, log_time from tests.testing_utils import requires_gpu + +def is_quay_image(url: str) -> bool: + pattern = r"^quay\.io/[a-z0-9][a-z0-9-_]*/[a-z0-9][a-z0-9-_/]*:[\w][\w.-]*$" + return re.match(pattern, url) is not None + HF_MODEL_HUB_NAME = "nm-testing" TEST_DATA_FILE = os.environ.get( "TEST_DATA_FILE", "tests/e2e/vLLM/configs/int8_dynamic_per_token.yaml" ) SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "") -# vllm environment: image url, same (default), or the path of vllm virtualenv +# vllm environment: image url, deployed runner name, same (default), or the path of vllm virtualenv VLLM_PYTHON_ENV = os.environ.get("VLLM_PYTHON_ENV", "same") +IS_VLLM_IMAGE = False +IS_VLLM_IMAGE_DEPLOYED=False RUN_SAVE_DIR=os.environ.get("RUN_SAVE_DIR", "none") +VLLM_VOLUME_MOUNT_DIR=os.environ.get("VLLM_VOLUME_MOUNT_DIR", "/opt/app-root/runs") +# when using vllm image, needs to save the generated model and vllm command +if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()): + IS_VLLM_IMAGE = True + if not is_quay_image(VLLM_PYTHON_ENV): + IS_VLLM_IMAGE_DEPLOYED = True + assert RUN_SAVE_DIR != "none", "To use vllm image must set RUN_SAVE_DIR too!" + TIMINGS_DIR = os.environ.get("TIMINGS_DIR", "timings/e2e-test_vllm") os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" EXPECTED_SAVED_FILES = [ @@ -32,11 +47,6 @@ "recipe.yaml", "tokenizer.json", ] -IS_VLLM_IMAGE = False -# when using vllm image, needs to save the generated model and vllm command -if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()): - IS_VLLM_IMAGE = True - assert RUN_SAVE_DIR != "none", "To use vllm image must set RUN_SAVE_DIR too!" # Will run each test case in its own process through run_tests.sh # emulating vLLM CI testing @@ -61,6 +71,7 @@ class TestvLLM: be used for quantization. Otherwise, the recipe will always be used if given. """ # noqa: E501 + def set_up(self, test_data_file: str): eval_config = yaml.safe_load(Path(test_data_file).read_text(encoding="utf-8")) @@ -81,7 +92,7 @@ def set_up(self, test_data_file: str): self.max_seq_length = eval_config.get("max_seq_length", 2048) # GPU memory utilization - only set if explicitly provided in config self.gpu_memory_utilization = eval_config.get("gpu_memory_utilization") - self.is_vllm_image = IS_VLLM_IMAGE + #self.is_vllm_image = IS_VLLM_IMAGE if VLLM_PYTHON_ENV.lower() == "same": self.vllm_env = sys.executable else: @@ -90,20 +101,19 @@ def set_up(self, test_data_file: str): if RUN_SAVE_DIR != "none": assert Path(RUN_SAVE_DIR).exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}" self.run_save_dir = RUN_SAVE_DIR + # RUN_SAVE_DIR overwrites config save_dir + self.save_dir = os.path.join(RUN_SAVE_DIR, self.model.split("/")[1] + f"-{self.scheme}") if not self.save_dir: - if RUN_SAVE_DIR != "none": - self.save_dir = os.path.join(RUN_SAVE_DIR, self.model.split("/")[1] + f"-{self.scheme}") - else: - self.save_dir = self.model.split("/")[1] + f"-{self.scheme}" + self.save_dir = self.model.split("/")[1] + f"-{self.scheme}" logger.info("========== RUNNING ==============") logger.info(f"model save dir: {self.save_dir}") - # command file to run vllm if using vllm image - if self.is_vllm_image: - self.vllm_cmd_file = os.path.join(RUN_SAVE_DIR, "vllm.cmd") - logger.info(f"vllm cmd file save dir: {self.vllm_cmd_file}") + # script to run vllm if using vllm image + if IS_VLLM_IMAGE: + self.vllm_bash = os.path.join(RUN_SAVE_DIR, "run-vllm.bash") + logger.info(f"vllm bash save dir: {self.vllm_bash}") self.prompts = [ "The capital of France is", @@ -151,7 +161,8 @@ def test_vllm(self, test_data_file: str): fp.write(recipe_yaml_str) session.reset() - if SKIP_HF_UPLOAD.lower() != "yes": + # if vllm image is used, don't upload + if SKIP_HF_UPLOAD.lower() != "yes" and not IS_VLLM_IMAGE: logger.info("================= UPLOADING TO HUB ======================") stub = f"{HF_MODEL_HUB_NAME}/{self.save_dir}-e2e" @@ -168,8 +179,7 @@ def test_vllm(self, test_data_file: str): folder_path=self.save_dir, ) - logger.info(f"Before vllm starts, here is self.save_dir: {self.save_dir}") - if self.is_vllm_image: + if IS_VLLM_IMAGE: logger.info("========== To run vLLM with vllm image ==========") else: if VLLM_PYTHON_ENV.lower() == "same": @@ -182,6 +192,7 @@ def test_vllm(self, test_data_file: str): self.tear_down() def tear_down(self): + # model save_dir is needed for vllm image testing if not IS_VLLM_IMAGE and self.save_dir is not None and os.path.isdir(self.save_dir): shutil.rmtree(self.save_dir) @@ -209,7 +220,10 @@ def _run_vllm(self, logger): import json import subprocess - llm_kwargs = {"model": self.save_dir} + llm_kwargs = {"model": self.save_dir(} + if IS_VLLM_IMAGE: + llm_kwargs = {"model": + self.save_dir.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR))} if self.gpu_memory_utilization is not None: llm_kwargs["gpu_memory_utilization"] = self.gpu_memory_utilization @@ -219,20 +233,26 @@ def _run_vllm(self, logger): json_prompts = json.dumps(self.prompts) test_file_dir = os.path.dirname(os.path.abspath(__file__)) - run_file_path = os.path.join(test_file_dir, "run_vllm.py") logger.info("Run vllm using env:") logger.info(self.vllm_env) - if self.is_vllm_image: + if IS_VLLM_IMAGE: + run_file_path = os.path.join(VLLM_VOLUME_MOUNT_DIR, "run_vllm.py") cmds = ["python", run_file_path, f"'{json_scheme}'", f"'{json_llm_kwargs}'", f"'{json_prompts}'"] vllm_cmd = " ".join(cmds) - with open(self.vllm_cmd_file, "a") as cf: + with open(self.vllm_bash, "w") as cf: + cf.write("#!/bin/bash\n\n") cf.write(vllm_cmd + "\n") - logger.info(f"Wrote vllm cmd into {self.vllm_cmd_file}:") + logger.info(f"Wrote vllm cmd into {self.vllm_bash}:") logger.info(vllm_cmd) + if IS_VLLM_IMAGE_DEPLOYED: + logger.info("vllm image is deployed. Run vllm cmd with kubectl.") + else: + logger.info("use vllm image directly. Run vllm cmd with podman.") else: + run_file_path = os.path.join(test_file_dir, "run_vllm.py") logger.info("Run vllm in subprocess.Popen using python env:") logger.info(self.vllm_env) result = subprocess.Popen( From c1dde7f2f65b650d80c55b613501c48442377ca4 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Wed, 5 Nov 2025 15:05:30 -0500 Subject: [PATCH 11/41] fix a typo Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 7cc6f37ce0..62523e73d4 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -220,7 +220,7 @@ def _run_vllm(self, logger): import json import subprocess - llm_kwargs = {"model": self.save_dir(} + llm_kwargs = {"model": self.save_dir} if IS_VLLM_IMAGE: llm_kwargs = {"model": self.save_dir.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR))} From ae9e526791a036540c3e2337c13de3ee43d94fc5 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Wed, 5 Nov 2025 15:06:41 -0500 Subject: [PATCH 12/41] remove extra ) Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 62523e73d4..e4330c6eaa 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -223,7 +223,7 @@ def _run_vllm(self, logger): llm_kwargs = {"model": self.save_dir} if IS_VLLM_IMAGE: llm_kwargs = {"model": - self.save_dir.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR))} + self.save_dir.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR)} if self.gpu_memory_utilization is not None: llm_kwargs["gpu_memory_utilization"] = self.gpu_memory_utilization From 80352db9526aa37918419a2328460b44b3628cf4 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Wed, 5 Nov 2025 16:27:48 -0500 Subject: [PATCH 13/41] run vllm with podman Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index e4330c6eaa..456a03fa6f 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -250,7 +250,21 @@ def _run_vllm(self, logger): if IS_VLLM_IMAGE_DEPLOYED: logger.info("vllm image is deployed. Run vllm cmd with kubectl.") else: - logger.info("use vllm image directly. Run vllm cmd with podman.") + logger.info("Run vllm in subprocess.Popen with podman using vllm:") + logger.info(self.vllm_env) + result = subprocess.Popen( + ["podman", "run --rm -it --device nvidia.com/gpu=0", + "--security-opt=label=disable --userns=keep-id:uid=1001", + "--env=VLLM_NO_USAGE_STATS=1 --entrypoint=self.vllm_bash", + "-v RUN_SAVE_DIR:VLLM_VOLUME_MOUNT_DIR ${VLLM_PYTHON_ENV}"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + stdout, stderr = result.communicate() + logger.info(stdout) + error_msg = f"ERROR: vLLM failed with exit code {result.returncode}: {stderr}" + assert result.returncode == 0, error_msg else: run_file_path = os.path.join(test_file_dir, "run_vllm.py") logger.info("Run vllm in subprocess.Popen using python env:") From 8461d03fab753991dec104bad67716f1c0dffd6c Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Wed, 5 Nov 2025 16:30:49 -0500 Subject: [PATCH 14/41] fix error Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 456a03fa6f..ef010dd13a 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -253,10 +253,10 @@ def _run_vllm(self, logger): logger.info("Run vllm in subprocess.Popen with podman using vllm:") logger.info(self.vllm_env) result = subprocess.Popen( - ["podman", "run --rm -it --device nvidia.com/gpu=0", - "--security-opt=label=disable --userns=keep-id:uid=1001", - "--env=VLLM_NO_USAGE_STATS=1 --entrypoint=self.vllm_bash", - "-v RUN_SAVE_DIR:VLLM_VOLUME_MOUNT_DIR ${VLLM_PYTHON_ENV}"], + ["podman", "run --rm -it --device nvidia.com/gpu=0 + --security-opt=label=disable --userns=keep-id:uid=1001 + --env=VLLM_NO_USAGE_STATS=1 --entrypoint=self.vllm_bash + -v RUN_SAVE_DIR:VLLM_VOLUME_MOUNT_DIR ${VLLM_PYTHON_ENV}"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, From 5704e62ca7246872972184fd4488ff8286d6f249 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Wed, 5 Nov 2025 16:37:12 -0500 Subject: [PATCH 15/41] fix issues Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index ef010dd13a..16097f9f9e 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -242,8 +242,9 @@ def _run_vllm(self, logger): cmds = ["python", run_file_path, f"'{json_scheme}'", f"'{json_llm_kwargs}'", f"'{json_prompts}'"] vllm_cmd = " ".join(cmds) - with open(self.vllm_bash, "w") as cf: + with open(self.vllm_bash, "a") as cf: cf.write("#!/bin/bash\n\n") + cf.write("export VLLM_NO_USAGE_STATS=1\n\n") cf.write(vllm_cmd + "\n") logger.info(f"Wrote vllm cmd into {self.vllm_bash}:") logger.info(vllm_cmd) @@ -253,18 +254,22 @@ def _run_vllm(self, logger): logger.info("Run vllm in subprocess.Popen with podman using vllm:") logger.info(self.vllm_env) result = subprocess.Popen( - ["podman", "run --rm -it --device nvidia.com/gpu=0 - --security-opt=label=disable --userns=keep-id:uid=1001 - --env=VLLM_NO_USAGE_STATS=1 --entrypoint=self.vllm_bash - -v RUN_SAVE_DIR:VLLM_VOLUME_MOUNT_DIR ${VLLM_PYTHON_ENV}"], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - ) - stdout, stderr = result.communicate() - logger.info(stdout) - error_msg = f"ERROR: vLLM failed with exit code {result.returncode}: {stderr}" - assert result.returncode == 0, error_msg + [ + "podman", "run", "--rm", + "--device", "nvidia.com/gpu=0", + "--security-opt=label=disable", + "--userns=keep-id:uid=1001", + "--entrypoint", self.vllm_bash, + "-v", f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}", + VLLM_PYTHON_ENV, + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True) + stdout, stderr = result.communicate() + logger.info(stdout) + error_msg = f"ERROR: vLLM failed with exit code {result.returncode}: {stderr}" + assert result.returncode == 0, error_msg else: run_file_path = os.path.join(test_file_dir, "run_vllm.py") logger.info("Run vllm in subprocess.Popen using python env:") From 098f561e5b4fc53631b5ce25c687c6873ccaeb16 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Wed, 5 Nov 2025 16:40:01 -0500 Subject: [PATCH 16/41] fix path Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 16097f9f9e..d28540f282 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -259,7 +259,8 @@ def _run_vllm(self, logger): "--device", "nvidia.com/gpu=0", "--security-opt=label=disable", "--userns=keep-id:uid=1001", - "--entrypoint", self.vllm_bash, + "--entrypoint", + self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR), "-v", f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}", VLLM_PYTHON_ENV, ], From d56440850efb0432d7b0144f19cc206fdcea5f41 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Wed, 5 Nov 2025 16:53:11 -0500 Subject: [PATCH 17/41] improve output Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index d28540f282..fbf194644f 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -242,21 +242,30 @@ def _run_vllm(self, logger): cmds = ["python", run_file_path, f"'{json_scheme}'", f"'{json_llm_kwargs}'", f"'{json_prompts}'"] vllm_cmd = " ".join(cmds) - with open(self.vllm_bash, "a") as cf: - cf.write("#!/bin/bash\n\n") - cf.write("export VLLM_NO_USAGE_STATS=1\n\n") - cf.write(vllm_cmd + "\n") + with open(self.vllm_bash, "w") as cf: + cf.write(f"#!/bin/bash\n\n + export VLLM_NO_USAGE_STATS=1\n\n") + {vllm_cmd}\n") logger.info(f"Wrote vllm cmd into {self.vllm_bash}:") logger.info(vllm_cmd) if IS_VLLM_IMAGE_DEPLOYED: logger.info("vllm image is deployed. Run vllm cmd with kubectl.") else: - logger.info("Run vllm in subprocess.Popen with podman using vllm:") - logger.info(self.vllm_env) + podman_cmd = " ".join("podman", + "run", "--rm", "--device", + "nvidia.com/gpu=all", + "--security-opt=label=disable", + "--userns=keep-id:uid=1001", + "--entrypoint", + self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR), + "-v", f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}", + VLLM_PYTHON_ENV) + logger.info("podman command:") + logger.info(podman_cmd) result = subprocess.Popen( [ "podman", "run", "--rm", - "--device", "nvidia.com/gpu=0", + "--device", "nvidia.com/gpu=all", "--security-opt=label=disable", "--userns=keep-id:uid=1001", "--entrypoint", @@ -267,10 +276,6 @@ def _run_vllm(self, logger): stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) - stdout, stderr = result.communicate() - logger.info(stdout) - error_msg = f"ERROR: vLLM failed with exit code {result.returncode}: {stderr}" - assert result.returncode == 0, error_msg else: run_file_path = os.path.join(test_file_dir, "run_vllm.py") logger.info("Run vllm in subprocess.Popen using python env:") @@ -281,11 +286,12 @@ def _run_vllm(self, logger): stderr=subprocess.PIPE, text=True, ) - stdout, stderr = result.communicate() - logger.info(stdout) - error_msg = f"ERROR: vLLM failed with exit code {result.returncode}: {stderr}" - assert result.returncode == 0, error_msg + stdout, stderr = result.communicate() + logger.info(stdout) + + error_msg = f"ERROR: vLLM failed with exit code {result.returncode}: {stderr}" + assert result.returncode == 0, error_msg def _check_session_contains_recipe(self) -> None: session = active_session() From 5da7eee819d1bc8753c82f27da67ba436da437a1 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Wed, 5 Nov 2025 16:54:45 -0500 Subject: [PATCH 18/41] fix typo Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index fbf194644f..1c0ad550f6 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -244,7 +244,7 @@ def _run_vllm(self, logger): vllm_cmd = " ".join(cmds) with open(self.vllm_bash, "w") as cf: cf.write(f"#!/bin/bash\n\n - export VLLM_NO_USAGE_STATS=1\n\n") + export VLLM_NO_USAGE_STATS=1\n\n {vllm_cmd}\n") logger.info(f"Wrote vllm cmd into {self.vllm_bash}:") logger.info(vllm_cmd) From 4cb22517ee35e5031e7b265c3e620a39d9539c58 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Wed, 5 Nov 2025 17:02:18 -0500 Subject: [PATCH 19/41] fix format Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 1c0ad550f6..4a73b1141a 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -243,9 +243,10 @@ def _run_vllm(self, logger): f"'{json_llm_kwargs}'", f"'{json_prompts}'"] vllm_cmd = " ".join(cmds) with open(self.vllm_bash, "w") as cf: - cf.write(f"#!/bin/bash\n\n - export VLLM_NO_USAGE_STATS=1\n\n - {vllm_cmd}\n") + cf.write(f"""#!/bin/bash + export VLLM_NO_USAGE_STATS=1 + {vllm_cmd} + """) logger.info(f"Wrote vllm cmd into {self.vllm_bash}:") logger.info(vllm_cmd) if IS_VLLM_IMAGE_DEPLOYED: From d2cb6464fcbb46652b07bceb8dd262248a12a379 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Wed, 5 Nov 2025 17:08:19 -0500 Subject: [PATCH 20/41] fix command Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 4a73b1141a..0fd8075b1b 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -252,15 +252,14 @@ def _run_vllm(self, logger): if IS_VLLM_IMAGE_DEPLOYED: logger.info("vllm image is deployed. Run vllm cmd with kubectl.") else: - podman_cmd = " ".join("podman", - "run", "--rm", "--device", - "nvidia.com/gpu=all", + cmds = ["podman run --rm --device nvidia.com/gpu=all", "--security-opt=label=disable", "--userns=keep-id:uid=1001", "--entrypoint", self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR), "-v", f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}", - VLLM_PYTHON_ENV) + VLLM_PYTHON_ENV] + podman_cmd = " ".join(cmds) logger.info("podman command:") logger.info(podman_cmd) result = subprocess.Popen( From 5cdb54306229ae9b77dfc11b6ee902a5bb7efe9c Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Wed, 5 Nov 2025 17:11:26 -0500 Subject: [PATCH 21/41] allow file to execute Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 0fd8075b1b..5911584919 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -247,6 +247,7 @@ def _run_vllm(self, logger): export VLLM_NO_USAGE_STATS=1 {vllm_cmd} """) + os.chmod(self.vllm_bash, 0o755) logger.info(f"Wrote vllm cmd into {self.vllm_bash}:") logger.info(vllm_cmd) if IS_VLLM_IMAGE_DEPLOYED: From 6dc42c41962820f0529b2616c75c8ecad6c3c0f6 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Wed, 5 Nov 2025 17:14:01 -0500 Subject: [PATCH 22/41] minor update Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 5911584919..d4607b89d6 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -244,6 +244,7 @@ def _run_vllm(self, logger): vllm_cmd = " ".join(cmds) with open(self.vllm_bash, "w") as cf: cf.write(f"""#!/bin/bash + export HF_HUB_OFFLINE=0 export VLLM_NO_USAGE_STATS=1 {vllm_cmd} """) From 84634e0de77ee1e7db693c9f82cbfd97087bc235 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Wed, 5 Nov 2025 17:23:40 -0500 Subject: [PATCH 23/41] copy file Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index d4607b89d6..c891c53841 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -239,6 +239,7 @@ def _run_vllm(self, logger): if IS_VLLM_IMAGE: run_file_path = os.path.join(VLLM_VOLUME_MOUNT_DIR, "run_vllm.py") + shutil.copy(os.path.join(test_file_dir, "run_vllm.py"), run_file_path) cmds = ["python", run_file_path, f"'{json_scheme}'", f"'{json_llm_kwargs}'", f"'{json_prompts}'"] vllm_cmd = " ".join(cmds) @@ -268,8 +269,6 @@ def _run_vllm(self, logger): [ "podman", "run", "--rm", "--device", "nvidia.com/gpu=all", - "--security-opt=label=disable", - "--userns=keep-id:uid=1001", "--entrypoint", self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR), "-v", f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}", From 57c99acf17e4e81d357e93d8edb74633944c266e Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Wed, 5 Nov 2025 17:26:40 -0500 Subject: [PATCH 24/41] fix issue Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index c891c53841..b7fc6c2681 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -239,7 +239,8 @@ def _run_vllm(self, logger): if IS_VLLM_IMAGE: run_file_path = os.path.join(VLLM_VOLUME_MOUNT_DIR, "run_vllm.py") - shutil.copy(os.path.join(test_file_dir, "run_vllm.py"), run_file_path) + shutil.copy(os.path.join(test_file_dir, "run_vllm.py"), + os.path.join(RUN_SAVE_DIR, "run_vllm.py")) cmds = ["python", run_file_path, f"'{json_scheme}'", f"'{json_llm_kwargs}'", f"'{json_prompts}'"] vllm_cmd = " ".join(cmds) From 7cdedbbd5c2b842560c76d0e8f8ed03c511544a1 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Fri, 7 Nov 2025 13:57:54 -0500 Subject: [PATCH 25/41] run vllm in deployed pod Signed-off-by: Dan Huang --- tests/e2e/vLLM/e2e-smoke.list | 5 ----- tests/e2e/vLLM/lmeval-smoke.list | 6 ++++++ tests/e2e/vLLM/test_vllm.py | 24 ++++++++++++++++-------- 3 files changed, 22 insertions(+), 13 deletions(-) create mode 100644 tests/e2e/vLLM/lmeval-smoke.list diff --git a/tests/e2e/vLLM/e2e-smoke.list b/tests/e2e/vLLM/e2e-smoke.list index e7f42d4a1e..9737d8d37a 100644 --- a/tests/e2e/vLLM/e2e-smoke.list +++ b/tests/e2e/vLLM/e2e-smoke.list @@ -1,6 +1 @@ fp8_dynamic_per_token.yaml -kv_cache_gptq_tinyllama.yaml -sparse2of4_fp8_dynamic.yaml -w4a16_grouped_quant_asym_awq.yaml -w4a16_actorder_weight.yaml -int8_channel_weight_static_per_tensor_act.yaml diff --git a/tests/e2e/vLLM/lmeval-smoke.list b/tests/e2e/vLLM/lmeval-smoke.list new file mode 100644 index 0000000000..e7f42d4a1e --- /dev/null +++ b/tests/e2e/vLLM/lmeval-smoke.list @@ -0,0 +1,6 @@ +fp8_dynamic_per_token.yaml +kv_cache_gptq_tinyllama.yaml +sparse2of4_fp8_dynamic.yaml +w4a16_grouped_quant_asym_awq.yaml +w4a16_actorder_weight.yaml +int8_channel_weight_static_per_tensor_act.yaml diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index b7fc6c2681..231e9120d0 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -255,22 +255,30 @@ def _run_vllm(self, logger): logger.info(vllm_cmd) if IS_VLLM_IMAGE_DEPLOYED: logger.info("vllm image is deployed. Run vllm cmd with kubectl.") + cmds = [f"kubectl exec -it VLLM_PYTHON_ENV -n arc-runners", + f"-- /bin/bash {RUN_SAVE_DIR}/run-vllm.bash"] + kubectl_cmd = " ".join(cmds) + logger.info(f"kubectl command: {kubectl_cmd}") + result = subprocess.Popen( + [ + "kubectl", "exec", "-it", + VLLM_PYTHON_ENV, "-n arc-runners", + "-- /bin/bash", f"{RUN_SAVE_DIR}/run-vllm.bash" + ] + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True) else: - cmds = ["podman run --rm --device nvidia.com/gpu=all", - "--security-opt=label=disable", - "--userns=keep-id:uid=1001", - "--entrypoint", + cmds = ["podman run --rm --device nvidia.com/gpu=all --entrypoint", self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR), "-v", f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}", VLLM_PYTHON_ENV] podman_cmd = " ".join(cmds) - logger.info("podman command:") - logger.info(podman_cmd) + logger.info(f"podman command: {podman_cmd}") result = subprocess.Popen( [ "podman", "run", "--rm", - "--device", "nvidia.com/gpu=all", - "--entrypoint", + "--device", "nvidia.com/gpu=all", "--entrypoint", self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR), "-v", f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}", VLLM_PYTHON_ENV, From 3951475283aa80e946dc18c56bef82bbc4f6e97c Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Fri, 7 Nov 2025 15:15:32 -0500 Subject: [PATCH 26/41] missed , Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 231e9120d0..e774913ffb 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -255,7 +255,7 @@ def _run_vllm(self, logger): logger.info(vllm_cmd) if IS_VLLM_IMAGE_DEPLOYED: logger.info("vllm image is deployed. Run vllm cmd with kubectl.") - cmds = [f"kubectl exec -it VLLM_PYTHON_ENV -n arc-runners", + cmds = [f"kubectl exec -it {VLLM_PYTHON_ENV} -n arc-runners", f"-- /bin/bash {RUN_SAVE_DIR}/run-vllm.bash"] kubectl_cmd = " ".join(cmds) logger.info(f"kubectl command: {kubectl_cmd}") @@ -263,8 +263,8 @@ def _run_vllm(self, logger): [ "kubectl", "exec", "-it", VLLM_PYTHON_ENV, "-n arc-runners", - "-- /bin/bash", f"{RUN_SAVE_DIR}/run-vllm.bash" - ] + "-- /bin/bash", f"{RUN_SAVE_DIR}/run-vllm.bash", + ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) From 5c401fcba15abf6adda43d19966bf2a2a8f2aff6 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Fri, 7 Nov 2025 16:06:35 -0500 Subject: [PATCH 27/41] fix command Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index e774913ffb..fb3e406310 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -262,8 +262,8 @@ def _run_vllm(self, logger): result = subprocess.Popen( [ "kubectl", "exec", "-it", - VLLM_PYTHON_ENV, "-n arc-runners", - "-- /bin/bash", f"{RUN_SAVE_DIR}/run-vllm.bash", + VLLM_PYTHON_ENV, "-n", "arc-runners", + "--", "/bin/bash", f"{RUN_SAVE_DIR}/run-vllm.bash", ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, From 870b6ee07810d890bbdbca20c619bf9b13de877d Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Tue, 11 Nov 2025 11:54:51 -0500 Subject: [PATCH 28/41] remove VLLM_VOLUME_MOUNT_DIR Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index fb3e406310..fb22235b56 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -31,7 +31,7 @@ def is_quay_image(url: str) -> bool: IS_VLLM_IMAGE = False IS_VLLM_IMAGE_DEPLOYED=False RUN_SAVE_DIR=os.environ.get("RUN_SAVE_DIR", "none") -VLLM_VOLUME_MOUNT_DIR=os.environ.get("VLLM_VOLUME_MOUNT_DIR", "/opt/app-root/runs") +#VLLM_VOLUME_MOUNT_DIR=os.environ.get("VLLM_VOLUME_MOUNT_DIR", "/opt/app-root/runs") # when using vllm image, needs to save the generated model and vllm command if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()): IS_VLLM_IMAGE = True @@ -221,9 +221,9 @@ def _run_vllm(self, logger): import subprocess llm_kwargs = {"model": self.save_dir} - if IS_VLLM_IMAGE: - llm_kwargs = {"model": - self.save_dir.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR)} + #if IS_VLLM_IMAGE: + # llm_kwargs = {"model": + # self.save_dir.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR)} if self.gpu_memory_utilization is not None: llm_kwargs["gpu_memory_utilization"] = self.gpu_memory_utilization @@ -238,7 +238,7 @@ def _run_vllm(self, logger): logger.info(self.vllm_env) if IS_VLLM_IMAGE: - run_file_path = os.path.join(VLLM_VOLUME_MOUNT_DIR, "run_vllm.py") + #run_file_path = os.path.join(VLLM_VOLUME_MOUNT_DIR, "run_vllm.py") shutil.copy(os.path.join(test_file_dir, "run_vllm.py"), os.path.join(RUN_SAVE_DIR, "run_vllm.py")) cmds = ["python", run_file_path, f"'{json_scheme}'", @@ -270,8 +270,10 @@ def _run_vllm(self, logger): text=True) else: cmds = ["podman run --rm --device nvidia.com/gpu=all --entrypoint", - self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR), - "-v", f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}", + #self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR), + self.vllm_bash, + "-v", #f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}", + f"{RUN_SAVE_DIR}:{RUN_SAVE_DIR}", VLLM_PYTHON_ENV] podman_cmd = " ".join(cmds) logger.info(f"podman command: {podman_cmd}") @@ -279,8 +281,10 @@ def _run_vllm(self, logger): [ "podman", "run", "--rm", "--device", "nvidia.com/gpu=all", "--entrypoint", - self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR), - "-v", f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}", + #self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR), + self.vllm_bash, + "-v", #f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}", + f"{RUN_SAVE_DIR}:{RUN_SAVE_DIR}", VLLM_PYTHON_ENV, ], stdout=subprocess.PIPE, From d23bdf4ba7d1cd734c02142456729a6ffa3b1ddd Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Tue, 11 Nov 2025 12:17:10 -0500 Subject: [PATCH 29/41] fix missing path Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index fb22235b56..bacff76f20 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -239,6 +239,7 @@ def _run_vllm(self, logger): if IS_VLLM_IMAGE: #run_file_path = os.path.join(VLLM_VOLUME_MOUNT_DIR, "run_vllm.py") + run_file_path = os.path.join(RUN_SAVE_DIR, "run_vllm.py") shutil.copy(os.path.join(test_file_dir, "run_vllm.py"), os.path.join(RUN_SAVE_DIR, "run_vllm.py")) cmds = ["python", run_file_path, f"'{json_scheme}'", From 625c9db1bdfbb11143f4114a628302ab3f31bb74 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Tue, 11 Nov 2025 13:23:53 -0500 Subject: [PATCH 30/41] clean up Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 36 +++++++----------------------------- 1 file changed, 7 insertions(+), 29 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index bacff76f20..fdcd8d4238 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -31,13 +31,12 @@ def is_quay_image(url: str) -> bool: IS_VLLM_IMAGE = False IS_VLLM_IMAGE_DEPLOYED=False RUN_SAVE_DIR=os.environ.get("RUN_SAVE_DIR", "none") -#VLLM_VOLUME_MOUNT_DIR=os.environ.get("VLLM_VOLUME_MOUNT_DIR", "/opt/app-root/runs") -# when using vllm image, needs to save the generated model and vllm command +# when using vllm image, needs to save the generated model if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()): IS_VLLM_IMAGE = True if not is_quay_image(VLLM_PYTHON_ENV): IS_VLLM_IMAGE_DEPLOYED = True - assert RUN_SAVE_DIR != "none", "To use vllm image must set RUN_SAVE_DIR too!" + assert RUN_SAVE_DIR != "none", "To use vllm image, RUN_SAVE_DIR must be set!" TIMINGS_DIR = os.environ.get("TIMINGS_DIR", "timings/e2e-test_vllm") os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" @@ -92,7 +91,6 @@ def set_up(self, test_data_file: str): self.max_seq_length = eval_config.get("max_seq_length", 2048) # GPU memory utilization - only set if explicitly provided in config self.gpu_memory_utilization = eval_config.get("gpu_memory_utilization") - #self.is_vllm_image = IS_VLLM_IMAGE if VLLM_PYTHON_ENV.lower() == "same": self.vllm_env = sys.executable else: @@ -101,7 +99,7 @@ def set_up(self, test_data_file: str): if RUN_SAVE_DIR != "none": assert Path(RUN_SAVE_DIR).exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}" self.run_save_dir = RUN_SAVE_DIR - # RUN_SAVE_DIR overwrites config save_dir + # RUN_SAVE_DIR overwrites config save_dir if specified self.save_dir = os.path.join(RUN_SAVE_DIR, self.model.split("/")[1] + f"-{self.scheme}") if not self.save_dir: @@ -112,6 +110,7 @@ def set_up(self, test_data_file: str): # script to run vllm if using vllm image if IS_VLLM_IMAGE: + # script file containing vllm commands to run in the image self.vllm_bash = os.path.join(RUN_SAVE_DIR, "run-vllm.bash") logger.info(f"vllm bash save dir: {self.vllm_bash}") @@ -126,9 +125,6 @@ def test_vllm(self, test_data_file: str): # Run vLLM with saved model self.set_up(test_data_file) - # not need this anymore? - #if not self.save_dir: - # self.save_dir = self.model.split("/")[1] + f"-{self.scheme}" oneshot_model, tokenizer = run_oneshot_for_e2e_testing( model=self.model, model_class=self.model_class, @@ -221,9 +217,6 @@ def _run_vllm(self, logger): import subprocess llm_kwargs = {"model": self.save_dir} - #if IS_VLLM_IMAGE: - # llm_kwargs = {"model": - # self.save_dir.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR)} if self.gpu_memory_utilization is not None: llm_kwargs["gpu_memory_utilization"] = self.gpu_memory_utilization @@ -238,7 +231,6 @@ def _run_vllm(self, logger): logger.info(self.vllm_env) if IS_VLLM_IMAGE: - #run_file_path = os.path.join(VLLM_VOLUME_MOUNT_DIR, "run_vllm.py") run_file_path = os.path.join(RUN_SAVE_DIR, "run_vllm.py") shutil.copy(os.path.join(test_file_dir, "run_vllm.py"), os.path.join(RUN_SAVE_DIR, "run_vllm.py")) @@ -253,38 +245,24 @@ def _run_vllm(self, logger): """) os.chmod(self.vllm_bash, 0o755) logger.info(f"Wrote vllm cmd into {self.vllm_bash}:") - logger.info(vllm_cmd) if IS_VLLM_IMAGE_DEPLOYED: logger.info("vllm image is deployed. Run vllm cmd with kubectl.") - cmds = [f"kubectl exec -it {VLLM_PYTHON_ENV} -n arc-runners", - f"-- /bin/bash {RUN_SAVE_DIR}/run-vllm.bash"] - kubectl_cmd = " ".join(cmds) - logger.info(f"kubectl command: {kubectl_cmd}") result = subprocess.Popen( [ "kubectl", "exec", "-it", VLLM_PYTHON_ENV, "-n", "arc-runners", - "--", "/bin/bash", f"{RUN_SAVE_DIR}/run-vllm.bash", + "--", "/bin/bash", self.vllm_bash, ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) else: - cmds = ["podman run --rm --device nvidia.com/gpu=all --entrypoint", - #self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR), - self.vllm_bash, - "-v", #f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}", - f"{RUN_SAVE_DIR}:{RUN_SAVE_DIR}", - VLLM_PYTHON_ENV] - podman_cmd = " ".join(cmds) - logger.info(f"podman command: {podman_cmd}") + logger.info("vllm image is pulled. Run vllm cmd with podman.") result = subprocess.Popen( [ "podman", "run", "--rm", "--device", "nvidia.com/gpu=all", "--entrypoint", - #self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR), - self.vllm_bash, - "-v", #f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}", + self.vllm_bash, "-v", f"{RUN_SAVE_DIR}:{RUN_SAVE_DIR}", VLLM_PYTHON_ENV, ], From 264fdcb206faa5ddfa898570ab99120799220829 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Thu, 13 Nov 2025 14:05:02 -0500 Subject: [PATCH 31/41] final update Signed-off-by: Dan Huang --- tests/e2e/vLLM/e2e-smoke.list | 6 ++++++ tests/e2e/vLLM/lmeval-smoke.list | 6 ------ tests/e2e/vLLM/test_vllm.py | 5 +++-- 3 files changed, 9 insertions(+), 8 deletions(-) delete mode 100644 tests/e2e/vLLM/lmeval-smoke.list diff --git a/tests/e2e/vLLM/e2e-smoke.list b/tests/e2e/vLLM/e2e-smoke.list index 9737d8d37a..d66618aaec 100644 --- a/tests/e2e/vLLM/e2e-smoke.list +++ b/tests/e2e/vLLM/e2e-smoke.list @@ -1 +1,7 @@ +fp4_nvfp4.yaml fp8_dynamic_per_token.yaml +kv_cache_gptq_tinyllama.yaml +sparse2of4_fp8_dynamic.yaml +w4a16_grouped_quant_asym_awq.yaml +w4a16_actorder_weight.yaml +int8_channel_weight_static_per_tensor_act.yaml diff --git a/tests/e2e/vLLM/lmeval-smoke.list b/tests/e2e/vLLM/lmeval-smoke.list deleted file mode 100644 index e7f42d4a1e..0000000000 --- a/tests/e2e/vLLM/lmeval-smoke.list +++ /dev/null @@ -1,6 +0,0 @@ -fp8_dynamic_per_token.yaml -kv_cache_gptq_tinyllama.yaml -sparse2of4_fp8_dynamic.yaml -w4a16_grouped_quant_asym_awq.yaml -w4a16_actorder_weight.yaml -int8_channel_weight_static_per_tensor_act.yaml diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index fdcd8d4238..9ca1c77ef3 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -26,7 +26,7 @@ def is_quay_image(url: str) -> bool: "TEST_DATA_FILE", "tests/e2e/vLLM/configs/int8_dynamic_per_token.yaml" ) SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "") -# vllm environment: image url, deployed runner name, same (default), or the path of vllm virtualenv +# vllm environment: same (default), the path of vllm virtualenv, image url, deployed runner name VLLM_PYTHON_ENV = os.environ.get("VLLM_PYTHON_ENV", "same") IS_VLLM_IMAGE = False IS_VLLM_IMAGE_DEPLOYED=False @@ -231,6 +231,7 @@ def _run_vllm(self, logger): logger.info(self.vllm_env) if IS_VLLM_IMAGE: + # generate python command to run in the vllm image run_file_path = os.path.join(RUN_SAVE_DIR, "run_vllm.py") shutil.copy(os.path.join(test_file_dir, "run_vllm.py"), os.path.join(RUN_SAVE_DIR, "run_vllm.py")) @@ -257,7 +258,7 @@ def _run_vllm(self, logger): stderr=subprocess.PIPE, text=True) else: - logger.info("vllm image is pulled. Run vllm cmd with podman.") + logger.info("vllm image is pulled locally. Run vllm cmd with podman.") result = subprocess.Popen( [ "podman", "run", "--rm", From 318bd3da6629802c06e66184d317939340728727 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Thu, 13 Nov 2025 17:37:47 -0500 Subject: [PATCH 32/41] clean up Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 45 ++++++++++--------------------------- 1 file changed, 12 insertions(+), 33 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 9ca1c77ef3..501fe4510f 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -16,27 +16,20 @@ from tests.testing_utils import requires_gpu -def is_quay_image(url: str) -> bool: - pattern = r"^quay\.io/[a-z0-9][a-z0-9-_]*/[a-z0-9][a-z0-9-_/]*:[\w][\w.-]*$" - return re.match(pattern, url) is not None - HF_MODEL_HUB_NAME = "nm-testing" TEST_DATA_FILE = os.environ.get( "TEST_DATA_FILE", "tests/e2e/vLLM/configs/int8_dynamic_per_token.yaml" ) SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "") -# vllm environment: same (default), the path of vllm virtualenv, image url, deployed runner name +# vllm environment: same (default), the path of vllm virtualenv, deployed runner name VLLM_PYTHON_ENV = os.environ.get("VLLM_PYTHON_ENV", "same") IS_VLLM_IMAGE = False -IS_VLLM_IMAGE_DEPLOYED=False RUN_SAVE_DIR=os.environ.get("RUN_SAVE_DIR", "none") # when using vllm image, needs to save the generated model if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()): IS_VLLM_IMAGE = True - if not is_quay_image(VLLM_PYTHON_ENV): - IS_VLLM_IMAGE_DEPLOYED = True - assert RUN_SAVE_DIR != "none", "To use vllm image, RUN_SAVE_DIR must be set!" + assert RUN_SAVE_DIR != "none", "To use vllm image, RUN_SAVE_DIR must be set!" TIMINGS_DIR = os.environ.get("TIMINGS_DIR", "timings/e2e-test_vllm") os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" @@ -246,30 +239,16 @@ def _run_vllm(self, logger): """) os.chmod(self.vllm_bash, 0o755) logger.info(f"Wrote vllm cmd into {self.vllm_bash}:") - if IS_VLLM_IMAGE_DEPLOYED: - logger.info("vllm image is deployed. Run vllm cmd with kubectl.") - result = subprocess.Popen( - [ - "kubectl", "exec", "-it", - VLLM_PYTHON_ENV, "-n", "arc-runners", - "--", "/bin/bash", self.vllm_bash, - ], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True) - else: - logger.info("vllm image is pulled locally. Run vllm cmd with podman.") - result = subprocess.Popen( - [ - "podman", "run", "--rm", - "--device", "nvidia.com/gpu=all", "--entrypoint", - self.vllm_bash, "-v", - f"{RUN_SAVE_DIR}:{RUN_SAVE_DIR}", - VLLM_PYTHON_ENV, - ], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True) + logger.info("vllm image. Run vllm cmd with kubectl.") + result = subprocess.Popen( + [ + "kubectl", "exec", "-it", + VLLM_PYTHON_ENV, "-n", "arc-runners", + "--", "/bin/bash", self.vllm_bash, + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True) else: run_file_path = os.path.join(test_file_dir, "run_vllm.py") logger.info("Run vllm in subprocess.Popen using python env:") From 117ec9d9da2183d8949350debf6cd106ec379577 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Fri, 14 Nov 2025 15:54:23 -0500 Subject: [PATCH 33/41] fix quality failures Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 67 +++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 18 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 501fe4510f..1a93c5362e 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -15,7 +15,6 @@ from tests.test_timer.timer_utils import get_singleton_manager, log_time from tests.testing_utils import requires_gpu - HF_MODEL_HUB_NAME = "nm-testing" TEST_DATA_FILE = os.environ.get( @@ -25,7 +24,7 @@ # vllm environment: same (default), the path of vllm virtualenv, deployed runner name VLLM_PYTHON_ENV = os.environ.get("VLLM_PYTHON_ENV", "same") IS_VLLM_IMAGE = False -RUN_SAVE_DIR=os.environ.get("RUN_SAVE_DIR", "none") +RUN_SAVE_DIR = os.environ.get("RUN_SAVE_DIR", "none") # when using vllm image, needs to save the generated model if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()): IS_VLLM_IMAGE = True @@ -40,6 +39,7 @@ "tokenizer.json", ] + # Will run each test case in its own process through run_tests.sh # emulating vLLM CI testing @requires_gpu(1) @@ -63,7 +63,6 @@ class TestvLLM: be used for quantization. Otherwise, the recipe will always be used if given. """ # noqa: E501 - def set_up(self, test_data_file: str): eval_config = yaml.safe_load(Path(test_data_file).read_text(encoding="utf-8")) @@ -90,10 +89,14 @@ def set_up(self, test_data_file: str): self.vllm_env = VLLM_PYTHON_ENV if RUN_SAVE_DIR != "none": - assert Path(RUN_SAVE_DIR).exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}" + assert Path( + RUN_SAVE_DIR + ).exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}" self.run_save_dir = RUN_SAVE_DIR # RUN_SAVE_DIR overwrites config save_dir if specified - self.save_dir = os.path.join(RUN_SAVE_DIR, self.model.split("/")[1] + f"-{self.scheme}") + self.save_dir = os.path.join( + RUN_SAVE_DIR, self.model.split("/")[1] + f"-{self.scheme}" + ) if not self.save_dir: self.save_dir = self.model.split("/")[1] + f"-{self.scheme}" @@ -174,7 +177,9 @@ def test_vllm(self, test_data_file: str): if VLLM_PYTHON_ENV.lower() == "same": logger.info("========== RUNNING vLLM in the same python env ==========") else: - logger.info("========== RUNNING vLLM in a separate python env ==========") + logger.info( + "========== RUNNING vLLM in a separate python env ==========" + ) self._run_vllm(logger) @@ -182,7 +187,11 @@ def test_vllm(self, test_data_file: str): def tear_down(self): # model save_dir is needed for vllm image testing - if not IS_VLLM_IMAGE and self.save_dir is not None and os.path.isdir(self.save_dir): + if ( + not IS_VLLM_IMAGE + and self.save_dir is not None + and os.path.isdir(self.save_dir) + ): shutil.rmtree(self.save_dir) timer = get_singleton_manager() @@ -226,35 +235,57 @@ def _run_vllm(self, logger): if IS_VLLM_IMAGE: # generate python command to run in the vllm image run_file_path = os.path.join(RUN_SAVE_DIR, "run_vllm.py") - shutil.copy(os.path.join(test_file_dir, "run_vllm.py"), - os.path.join(RUN_SAVE_DIR, "run_vllm.py")) - cmds = ["python", run_file_path, f"'{json_scheme}'", - f"'{json_llm_kwargs}'", f"'{json_prompts}'"] + shutil.copy( + os.path.join(test_file_dir, "run_vllm.py"), + os.path.join(RUN_SAVE_DIR, "run_vllm.py"), + ) + cmds = [ + "python", + run_file_path, + f"'{json_scheme}'", + f"'{json_llm_kwargs}'", + f"'{json_prompts}'", + ] vllm_cmd = " ".join(cmds) with open(self.vllm_bash, "w") as cf: - cf.write(f"""#!/bin/bash + cf.write( + f"""#!/bin/bash export HF_HUB_OFFLINE=0 export VLLM_NO_USAGE_STATS=1 {vllm_cmd} - """) + """ + ) os.chmod(self.vllm_bash, 0o755) logger.info(f"Wrote vllm cmd into {self.vllm_bash}:") logger.info("vllm image. Run vllm cmd with kubectl.") result = subprocess.Popen( [ - "kubectl", "exec", "-it", - VLLM_PYTHON_ENV, "-n", "arc-runners", - "--", "/bin/bash", self.vllm_bash, + "kubectl", + "exec", + "-it", + VLLM_PYTHON_ENV, + "-n", + "arc-runners", + "--", + "/bin/bash", + self.vllm_bash, ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, - text=True) + text=True, + ) else: run_file_path = os.path.join(test_file_dir, "run_vllm.py") logger.info("Run vllm in subprocess.Popen using python env:") logger.info(self.vllm_env) result = subprocess.Popen( - [self.vllm_env, run_file_path, json_scheme, json_llm_kwargs, json_prompts], + [ + self.vllm_env, + run_file_path, + json_scheme, + json_llm_kwargs, + json_prompts, + ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, From 8b41d5f0991aeaf7223d4460917aea3fbc01ec85 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Mon, 24 Nov 2025 16:59:06 -0500 Subject: [PATCH 34/41] reorg test code and remove env var Signed-off-by: Dan Huang --- tests/e2e/vLLM/run_tests.sh | 12 +--- tests/e2e/vLLM/run_tests_in_rhaiis.sh | 81 +++++++++++++++++++++++++++ tests/e2e/vLLM/test_vllm.py | 79 ++++++++++---------------- 3 files changed, 112 insertions(+), 60 deletions(-) create mode 100644 tests/e2e/vLLM/run_tests_in_rhaiis.sh diff --git a/tests/e2e/vLLM/run_tests.sh b/tests/e2e/vLLM/run_tests.sh index 1f94cf06a3..9a2fcec9cd 100644 --- a/tests/e2e/vLLM/run_tests.sh +++ b/tests/e2e/vLLM/run_tests.sh @@ -16,18 +16,8 @@ while getopts "c:t:" OPT; do esac done -script_path=$(dirname "${BASH_SOURCE[0]}") -if [ -d "$CONFIG" ]; then - echo "Config is provided as a folder: $CONFIG" - CONFIGS=`ls "$CONFIG"` -elif [ -f "$CONFIG" ]; then - echo "Config is provided as a file: $CONFIG" - CONFIGS=`cat "$CONFIG"` -fi -echo "$CONFIGS" - # Parse list of configs. -for MODEL_CONFIG in $(echo -e "$CONFIGS" | sed "s|^|${script_path}/configs/|") +for MODEL_CONFIG in "$CONFIG"/* do LOCAL_SUCCESS=0 diff --git a/tests/e2e/vLLM/run_tests_in_rhaiis.sh b/tests/e2e/vLLM/run_tests_in_rhaiis.sh new file mode 100644 index 0000000000..2c1867f428 --- /dev/null +++ b/tests/e2e/vLLM/run_tests_in_rhaiis.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +usage() { + echo "Usage: $0 -c -t -s " + exit 1 +} + +while getopts "c:t:s:" OPT; do + case ${OPT} in + c ) + CONFIG="$OPTARG" + ;; + t ) + TEST="$OPTARG" + ;; + s ) + SAVE_DIR="$OPTARG" + ;; + \? ) + exit 1 + ;; + esac +done + +if [[ -z "$CONFIG" || -z "$TEST" || -z "$SAVE_DIR" ]]; then + echo "Error: -c, -t, and -s are required." + usage +fi + +script_path=$(dirname "${BASH_SOURCE[0]}") +if [ -d "$CONFIG" ]; then + echo "Config is provided as a folder: $CONFIG" + CONFIGS=`ls "$CONFIG"` +elif [ -f "$CONFIG" ]; then + echo "Config is provided as a file: $CONFIG" + CONFIGS=`cat "$CONFIG"` +fi + +SUCCESS=0 + +# Parse list of configs and add save_dir +rm -rf $SAVE_DIR/configs +mkdir -p $SAVE_DIR/configs +for MODEL_CONFIG in $(echo -e "$CONFIGS" | sed "s|^|${script_path}/configs/|") +do + FILE_NAME=$(basename $MODEL_CONFIG) + CONFIG_FILE=$SAVE_DIR/configs/$FILE_NAME + + save_dir=$(cat $MODEL_CONFIG | grep 'save_dir:' | cut -d' ' -f2) + model=$(cat $MODEL_CONFIG | grep 'model:' | cut -d'/' -f2) + scheme=$(cat $MODEL_CONFIG | grep 'scheme:' | cut -d' ' -f2) + + # add or overwrite save_dir for each model + if [[ -z "$save_dir" ]]; then + { cat $MODEL_CONFIG; echo -e "\nsave_dir: $SAVE_DIR/$model-$scheme"; } > $CONFIG_FILE + else + { cat $MODEL_CONFIG | grep -v 'save_dir'; echo "save_dir: $SAVE_DIR/$save_dir"; } > $CONFIG_FILE + fi + + #{ cat $MODEL_CONFIG | grep -v 'save_dir'; echo "save_dir: $SAVE_DIR"; } > $CONFIG_FILE + + echo "=== RUNNING MODEL: $CONFIG_FILE ===" + cat $CONFIG_FILE + + LOCAL_SUCCESS=0 + export TEST_DATA_FILE="$CONFIG_FILE" + pytest \ + --capture=tee-sys \ + "$TEST" || LOCAL_SUCCESS=$? + + if [[ $LOCAL_SUCCESS == 0 ]]; then + echo "=== PASSED MODEL: $CONFIG_FILE ===" + else + echo "=== FAILED MODEL: $CONFIG_FILE ===" + fi + + SUCCESS=$((SUCCESS + LOCAL_SUCCESS)) + +done + +exit "$SUCCESS" diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 1a93c5362e..e432a5f001 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -1,3 +1,4 @@ + import os import re import shutil @@ -21,15 +22,11 @@ "TEST_DATA_FILE", "tests/e2e/vLLM/configs/int8_dynamic_per_token.yaml" ) SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "") -# vllm environment: same (default), the path of vllm virtualenv, deployed runner name +# vllm python environment VLLM_PYTHON_ENV = os.environ.get("VLLM_PYTHON_ENV", "same") IS_VLLM_IMAGE = False -RUN_SAVE_DIR = os.environ.get("RUN_SAVE_DIR", "none") -# when using vllm image, needs to save the generated model if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()): IS_VLLM_IMAGE = True - assert RUN_SAVE_DIR != "none", "To use vllm image, RUN_SAVE_DIR must be set!" - TIMINGS_DIR = os.environ.get("TIMINGS_DIR", "timings/e2e-test_vllm") os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" EXPECTED_SAVED_FILES = [ @@ -83,32 +80,18 @@ def set_up(self, test_data_file: str): self.max_seq_length = eval_config.get("max_seq_length", 2048) # GPU memory utilization - only set if explicitly provided in config self.gpu_memory_utilization = eval_config.get("gpu_memory_utilization") - if VLLM_PYTHON_ENV.lower() == "same": - self.vllm_env = sys.executable - else: + # vllm python env - if same, use the current python env, otherwise use + # the python passed in VLLM_PYTHON_ENV + if VLLM_PYTHON_ENV.lower() != "same": self.vllm_env = VLLM_PYTHON_ENV - - if RUN_SAVE_DIR != "none": - assert Path( - RUN_SAVE_DIR - ).exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}" - self.run_save_dir = RUN_SAVE_DIR - # RUN_SAVE_DIR overwrites config save_dir if specified - self.save_dir = os.path.join( - RUN_SAVE_DIR, self.model.split("/")[1] + f"-{self.scheme}" - ) + else: + self.vllm_env = sys.executable if not self.save_dir: self.save_dir = self.model.split("/")[1] + f"-{self.scheme}" logger.info("========== RUNNING ==============") - logger.info(f"model save dir: {self.save_dir}") - - # script to run vllm if using vllm image - if IS_VLLM_IMAGE: - # script file containing vllm commands to run in the image - self.vllm_bash = os.path.join(RUN_SAVE_DIR, "run-vllm.bash") - logger.info(f"vllm bash save dir: {self.vllm_bash}") + logger.info(self.save_dir) self.prompts = [ "The capital of France is", @@ -117,9 +100,7 @@ def set_up(self, test_data_file: str): ] self.api = HfApi() - def test_vllm(self, test_data_file: str): - # Run vLLM with saved model - + def compress_model(self, test_data_file: str): self.set_up(test_data_file) oneshot_model, tokenizer = run_oneshot_for_e2e_testing( model=self.model, @@ -133,12 +114,16 @@ def test_vllm(self, test_data_file: str): recipe=self.recipe, quant_type=self.quant_type, ) + self.oneshot_model = oneshot_model + self.tokenizer = tokenizer # check that session contains recipe self._check_session_contains_recipe() + def save_compressed_model(self): + logger.info("================= SAVING TO DISK ======================") - self._save_compressed_model(oneshot_model=oneshot_model, tokenizer=tokenizer) + self._save_compressed_model(oneshot_model=self.oneshot_model, tokenizer=self.tokenizer) recipe_path = os.path.join(self.save_dir, "recipe.yaml") @@ -153,8 +138,7 @@ def test_vllm(self, test_data_file: str): fp.write(recipe_yaml_str) session.reset() - # if vllm image is used, don't upload - if SKIP_HF_UPLOAD.lower() != "yes" and not IS_VLLM_IMAGE: + if SKIP_HF_UPLOAD.lower() != "yes": logger.info("================= UPLOADING TO HUB ======================") stub = f"{HF_MODEL_HUB_NAME}/{self.save_dir}-e2e" @@ -171,27 +155,22 @@ def test_vllm(self, test_data_file: str): folder_path=self.save_dir, ) + def test_vllm(self): + # Run vLLM with saved model + if IS_VLLM_IMAGE: - logger.info("========== To run vLLM with vllm image ==========") + logger.info("========== RUNNING vLLM in RHAIIS vllm image ==========") + elif VLLM_PYTHON_ENV.lower() == "same": + logger.info("========== RUNNING vLLM in the same python env ==========") else: - if VLLM_PYTHON_ENV.lower() == "same": - logger.info("========== RUNNING vLLM in the same python env ==========") - else: - logger.info( - "========== RUNNING vLLM in a separate python env ==========" - ) + logger.info("========== RUNNING vLLM in a separate python env ==========") self._run_vllm(logger) self.tear_down() def tear_down(self): - # model save_dir is needed for vllm image testing - if ( - not IS_VLLM_IMAGE - and self.save_dir is not None - and os.path.isdir(self.save_dir) - ): + if self.save_dir is not None and os.path.isdir(self.save_dir): shutil.rmtree(self.save_dir) timer = get_singleton_manager() @@ -229,11 +208,12 @@ def _run_vllm(self, logger): test_file_dir = os.path.dirname(os.path.abspath(__file__)) - logger.info("Run vllm using env:") + logger.info("Run vllm in subprocess.Popen() using python env:") logger.info(self.vllm_env) if IS_VLLM_IMAGE: # generate python command to run in the vllm image + RUN_SAVE_DIR = os.path.dirname(self.save_dir) run_file_path = os.path.join(RUN_SAVE_DIR, "run_vllm.py") shutil.copy( os.path.join(test_file_dir, "run_vllm.py"), @@ -247,7 +227,8 @@ def _run_vllm(self, logger): f"'{json_prompts}'", ] vllm_cmd = " ".join(cmds) - with open(self.vllm_bash, "w") as cf: + vllm_bash = os.path.join(RUN_SAVE_DIR, "run-vllm.bash") + with open(vllm_bash, "w") as cf: cf.write( f"""#!/bin/bash export HF_HUB_OFFLINE=0 @@ -255,8 +236,8 @@ def _run_vllm(self, logger): {vllm_cmd} """ ) - os.chmod(self.vllm_bash, 0o755) - logger.info(f"Wrote vllm cmd into {self.vllm_bash}:") + os.chmod(vllm_bash, 0o755) + logger.info(f"Wrote vllm cmd into {vllm_bash}:") logger.info("vllm image. Run vllm cmd with kubectl.") result = subprocess.Popen( [ @@ -268,7 +249,7 @@ def _run_vllm(self, logger): "arc-runners", "--", "/bin/bash", - self.vllm_bash, + vllm_bash, ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, From 1b2530e9612ae458b0eb5907bd6df179acd66c94 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Tue, 25 Nov 2025 09:42:22 -0500 Subject: [PATCH 35/41] fix error Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index e432a5f001..9b8f6027cd 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -155,9 +155,15 @@ def save_compressed_model(self): folder_path=self.save_dir, ) - def test_vllm(self): - # Run vLLM with saved model + def test_vllm(self, test_data_file: str): + + self.set_up(self, test_data_file) + + self.compress_model(self, test_data_file) + self.save_compressed_model(self) + + # Run vLLM with saved model if IS_VLLM_IMAGE: logger.info("========== RUNNING vLLM in RHAIIS vllm image ==========") elif VLLM_PYTHON_ENV.lower() == "same": From 3d889c6838c49d75fe2bbae7870d551e3c7f7c4c Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Tue, 25 Nov 2025 10:42:05 -0500 Subject: [PATCH 36/41] fix another error Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 9b8f6027cd..e13878ef89 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -157,11 +157,11 @@ def save_compressed_model(self): def test_vllm(self, test_data_file: str): - self.set_up(self, test_data_file) + self.set_up(test_data_file) - self.compress_model(self, test_data_file) + self.compress_model(test_data_file) - self.save_compressed_model(self) + self.save_compressed_model() # Run vLLM with saved model if IS_VLLM_IMAGE: From 7e772026143a3ee848fd1d022897954b970d77be Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Tue, 25 Nov 2025 11:30:35 -0500 Subject: [PATCH 37/41] fix style Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index e13878ef89..5b963a641b 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -123,7 +123,8 @@ def compress_model(self, test_data_file: str): def save_compressed_model(self): logger.info("================= SAVING TO DISK ======================") - self._save_compressed_model(oneshot_model=self.oneshot_model, tokenizer=self.tokenizer) + self._save_compressed_model(oneshot_model=self.oneshot_model, + tokenizer=self.tokenizer) recipe_path = os.path.join(self.save_dir, "recipe.yaml") From 7662699bd6dfb565d9227752ea54fbd8face281a Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Tue, 25 Nov 2025 11:46:53 -0500 Subject: [PATCH 38/41] clean up and fix format Signed-off-by: Dan Huang --- tests/e2e/vLLM/run_tests_in_rhaiis.sh | 2 -- tests/e2e/vLLM/test_vllm.py | 12 +++--------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/tests/e2e/vLLM/run_tests_in_rhaiis.sh b/tests/e2e/vLLM/run_tests_in_rhaiis.sh index 2c1867f428..6f30028f21 100644 --- a/tests/e2e/vLLM/run_tests_in_rhaiis.sh +++ b/tests/e2e/vLLM/run_tests_in_rhaiis.sh @@ -57,8 +57,6 @@ do { cat $MODEL_CONFIG | grep -v 'save_dir'; echo "save_dir: $SAVE_DIR/$save_dir"; } > $CONFIG_FILE fi - #{ cat $MODEL_CONFIG | grep -v 'save_dir'; echo "save_dir: $SAVE_DIR"; } > $CONFIG_FILE - echo "=== RUNNING MODEL: $CONFIG_FILE ===" cat $CONFIG_FILE diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 5b963a641b..d9937312ca 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -1,4 +1,3 @@ - import os import re import shutil @@ -121,10 +120,10 @@ def compress_model(self, test_data_file: str): self._check_session_contains_recipe() def save_compressed_model(self): - logger.info("================= SAVING TO DISK ======================") - self._save_compressed_model(oneshot_model=self.oneshot_model, - tokenizer=self.tokenizer) + self._save_compressed_model( + oneshot_model=self.oneshot_model, tokenizer=self.tokenizer + ) recipe_path = os.path.join(self.save_dir, "recipe.yaml") @@ -158,8 +157,6 @@ def save_compressed_model(self): def test_vllm(self, test_data_file: str): - self.set_up(test_data_file) - self.compress_model(test_data_file) self.save_compressed_model() @@ -215,9 +212,6 @@ def _run_vllm(self, logger): test_file_dir = os.path.dirname(os.path.abspath(__file__)) - logger.info("Run vllm in subprocess.Popen() using python env:") - logger.info(self.vllm_env) - if IS_VLLM_IMAGE: # generate python command to run in the vllm image RUN_SAVE_DIR = os.path.dirname(self.save_dir) From abb6bab271ee067f8f1fb2599cfd246c1b353bcb Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Tue, 25 Nov 2025 11:54:31 -0500 Subject: [PATCH 39/41] fix format Signed-off-by: Dan Huang --- tests/e2e/vLLM/test_vllm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index d9937312ca..9e19a1ef19 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -156,7 +156,6 @@ def save_compressed_model(self): ) def test_vllm(self, test_data_file: str): - self.compress_model(test_data_file) self.save_compressed_model() From de58b026a50c88d97b8a9089c0485daa6996c17f Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Tue, 25 Nov 2025 12:46:39 -0500 Subject: [PATCH 40/41] rename file to be rhaiis specific Signed-off-by: Dan Huang --- tests/e2e/vLLM/{e2e-smoke.list => rhaiis-e2e-smoke.list} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/e2e/vLLM/{e2e-smoke.list => rhaiis-e2e-smoke.list} (100%) diff --git a/tests/e2e/vLLM/e2e-smoke.list b/tests/e2e/vLLM/rhaiis-e2e-smoke.list similarity index 100% rename from tests/e2e/vLLM/e2e-smoke.list rename to tests/e2e/vLLM/rhaiis-e2e-smoke.list From 984a8cc775aace5606ff18a754197b971dc61221 Mon Sep 17 00:00:00 2001 From: Dan Huang Date: Tue, 2 Dec 2025 14:31:59 -0500 Subject: [PATCH 41/41] rename run_tests.sh to run_tests_in_python.sh Signed-off-by: Dan Huang --- tests/e2e/vLLM/{run_tests.sh => run_tests_in_python.sh} | 0 tests/e2e/vLLM/test_vllm.py | 2 +- tests/lmeval/test_lmeval.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename tests/e2e/vLLM/{run_tests.sh => run_tests_in_python.sh} (100%) diff --git a/tests/e2e/vLLM/run_tests.sh b/tests/e2e/vLLM/run_tests_in_python.sh similarity index 100% rename from tests/e2e/vLLM/run_tests.sh rename to tests/e2e/vLLM/run_tests_in_python.sh diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 9e19a1ef19..066affc69e 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -36,7 +36,7 @@ ] -# Will run each test case in its own process through run_tests.sh +# Will run each test case in its own process through run_tests_in_python.sh # emulating vLLM CI testing @requires_gpu(1) @pytest.mark.parametrize( diff --git a/tests/lmeval/test_lmeval.py b/tests/lmeval/test_lmeval.py index a44cd042ff..662e9bb59a 100644 --- a/tests/lmeval/test_lmeval.py +++ b/tests/lmeval/test_lmeval.py @@ -44,7 +44,7 @@ class LmEvalConfig(BaseModel): TIMINGS_DIR = os.environ.get("TIMINGS_DIR", "timings/lm-eval") -# Will run each test case in its own process through run_tests.sh +# Will run each test case in its own process through run_tests_in_python.sh # emulating vLLM CI testing @requires_gpu(1) @pytest.mark.parametrize(