Skip to content

Commit 625c9db

Browse files
committed
clean up
Signed-off-by: Dan Huang <dahuang@redhat.com>
1 parent d23bdf4 commit 625c9db

File tree

1 file changed

+7
-29
lines changed

1 file changed

+7
-29
lines changed

tests/e2e/vLLM/test_vllm.py

Lines changed: 7 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,12 @@ def is_quay_image(url: str) -> bool:
3131
IS_VLLM_IMAGE = False
3232
IS_VLLM_IMAGE_DEPLOYED=False
3333
RUN_SAVE_DIR=os.environ.get("RUN_SAVE_DIR", "none")
34-
#VLLM_VOLUME_MOUNT_DIR=os.environ.get("VLLM_VOLUME_MOUNT_DIR", "/opt/app-root/runs")
35-
# when using vllm image, needs to save the generated model and vllm command
34+
# when using vllm image, needs to save the generated model
3635
if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()):
3736
IS_VLLM_IMAGE = True
3837
if not is_quay_image(VLLM_PYTHON_ENV):
3938
IS_VLLM_IMAGE_DEPLOYED = True
40-
assert RUN_SAVE_DIR != "none", "To use vllm image must set RUN_SAVE_DIR too!"
39+
assert RUN_SAVE_DIR != "none", "To use vllm image, RUN_SAVE_DIR must be set!"
4140

4241
TIMINGS_DIR = os.environ.get("TIMINGS_DIR", "timings/e2e-test_vllm")
4342
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
@@ -92,7 +91,6 @@ def set_up(self, test_data_file: str):
9291
self.max_seq_length = eval_config.get("max_seq_length", 2048)
9392
# GPU memory utilization - only set if explicitly provided in config
9493
self.gpu_memory_utilization = eval_config.get("gpu_memory_utilization")
95-
#self.is_vllm_image = IS_VLLM_IMAGE
9694
if VLLM_PYTHON_ENV.lower() == "same":
9795
self.vllm_env = sys.executable
9896
else:
@@ -101,7 +99,7 @@ def set_up(self, test_data_file: str):
10199
if RUN_SAVE_DIR != "none":
102100
assert Path(RUN_SAVE_DIR).exists(), f"RUN_SAVE_DIR path doesn't exist: {RUN_SAVE_DIR}"
103101
self.run_save_dir = RUN_SAVE_DIR
104-
# RUN_SAVE_DIR overwrites config save_dir
102+
# RUN_SAVE_DIR overwrites config save_dir if specified
105103
self.save_dir = os.path.join(RUN_SAVE_DIR, self.model.split("/")[1] + f"-{self.scheme}")
106104

107105
if not self.save_dir:
@@ -112,6 +110,7 @@ def set_up(self, test_data_file: str):
112110

113111
# script to run vllm if using vllm image
114112
if IS_VLLM_IMAGE:
113+
# script file containing vllm commands to run in the image
115114
self.vllm_bash = os.path.join(RUN_SAVE_DIR, "run-vllm.bash")
116115
logger.info(f"vllm bash save dir: {self.vllm_bash}")
117116

@@ -126,9 +125,6 @@ def test_vllm(self, test_data_file: str):
126125
# Run vLLM with saved model
127126

128127
self.set_up(test_data_file)
129-
# not need this anymore?
130-
#if not self.save_dir:
131-
# self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
132128
oneshot_model, tokenizer = run_oneshot_for_e2e_testing(
133129
model=self.model,
134130
model_class=self.model_class,
@@ -221,9 +217,6 @@ def _run_vllm(self, logger):
221217
import subprocess
222218

223219
llm_kwargs = {"model": self.save_dir}
224-
#if IS_VLLM_IMAGE:
225-
# llm_kwargs = {"model":
226-
# self.save_dir.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR)}
227220

228221
if self.gpu_memory_utilization is not None:
229222
llm_kwargs["gpu_memory_utilization"] = self.gpu_memory_utilization
@@ -238,7 +231,6 @@ def _run_vllm(self, logger):
238231
logger.info(self.vllm_env)
239232

240233
if IS_VLLM_IMAGE:
241-
#run_file_path = os.path.join(VLLM_VOLUME_MOUNT_DIR, "run_vllm.py")
242234
run_file_path = os.path.join(RUN_SAVE_DIR, "run_vllm.py")
243235
shutil.copy(os.path.join(test_file_dir, "run_vllm.py"),
244236
os.path.join(RUN_SAVE_DIR, "run_vllm.py"))
@@ -253,38 +245,24 @@ def _run_vllm(self, logger):
253245
""")
254246
os.chmod(self.vllm_bash, 0o755)
255247
logger.info(f"Wrote vllm cmd into {self.vllm_bash}:")
256-
logger.info(vllm_cmd)
257248
if IS_VLLM_IMAGE_DEPLOYED:
258249
logger.info("vllm image is deployed. Run vllm cmd with kubectl.")
259-
cmds = [f"kubectl exec -it {VLLM_PYTHON_ENV} -n arc-runners",
260-
f"-- /bin/bash {RUN_SAVE_DIR}/run-vllm.bash"]
261-
kubectl_cmd = " ".join(cmds)
262-
logger.info(f"kubectl command: {kubectl_cmd}")
263250
result = subprocess.Popen(
264251
[
265252
"kubectl", "exec", "-it",
266253
VLLM_PYTHON_ENV, "-n", "arc-runners",
267-
"--", "/bin/bash", f"{RUN_SAVE_DIR}/run-vllm.bash",
254+
"--", "/bin/bash", self.vllm_bash,
268255
],
269256
stdout=subprocess.PIPE,
270257
stderr=subprocess.PIPE,
271258
text=True)
272259
else:
273-
cmds = ["podman run --rm --device nvidia.com/gpu=all --entrypoint",
274-
#self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR),
275-
self.vllm_bash,
276-
"-v", #f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}",
277-
f"{RUN_SAVE_DIR}:{RUN_SAVE_DIR}",
278-
VLLM_PYTHON_ENV]
279-
podman_cmd = " ".join(cmds)
280-
logger.info(f"podman command: {podman_cmd}")
260+
logger.info("vllm image is pulled. Run vllm cmd with podman.")
281261
result = subprocess.Popen(
282262
[
283263
"podman", "run", "--rm",
284264
"--device", "nvidia.com/gpu=all", "--entrypoint",
285-
#self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR),
286-
self.vllm_bash,
287-
"-v", #f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}",
265+
self.vllm_bash, "-v",
288266
f"{RUN_SAVE_DIR}:{RUN_SAVE_DIR}",
289267
VLLM_PYTHON_ENV,
290268
],

0 commit comments

Comments
 (0)