@@ -31,13 +31,12 @@ def is_quay_image(url: str) -> bool:
3131IS_VLLM_IMAGE = False
3232IS_VLLM_IMAGE_DEPLOYED = False
3333RUN_SAVE_DIR = os .environ .get ("RUN_SAVE_DIR" , "none" )
34- #VLLM_VOLUME_MOUNT_DIR=os.environ.get("VLLM_VOLUME_MOUNT_DIR", "/opt/app-root/runs")
35- # when using vllm image, needs to save the generated model and vllm command
34+ # when using vllm image, needs to save the generated model
3635if VLLM_PYTHON_ENV .lower () != "same" and (not Path (VLLM_PYTHON_ENV ).exists ()):
3736 IS_VLLM_IMAGE = True
3837 if not is_quay_image (VLLM_PYTHON_ENV ):
3938 IS_VLLM_IMAGE_DEPLOYED = True
40- assert RUN_SAVE_DIR != "none" , "To use vllm image must set RUN_SAVE_DIR too !"
39+ assert RUN_SAVE_DIR != "none" , "To use vllm image, RUN_SAVE_DIR must be set !"
4140
4241TIMINGS_DIR = os .environ .get ("TIMINGS_DIR" , "timings/e2e-test_vllm" )
4342os .environ ["VLLM_WORKER_MULTIPROC_METHOD" ] = "spawn"
@@ -92,7 +91,6 @@ def set_up(self, test_data_file: str):
9291 self .max_seq_length = eval_config .get ("max_seq_length" , 2048 )
9392 # GPU memory utilization - only set if explicitly provided in config
9493 self .gpu_memory_utilization = eval_config .get ("gpu_memory_utilization" )
95- #self.is_vllm_image = IS_VLLM_IMAGE
9694 if VLLM_PYTHON_ENV .lower () == "same" :
9795 self .vllm_env = sys .executable
9896 else :
@@ -101,7 +99,7 @@ def set_up(self, test_data_file: str):
10199 if RUN_SAVE_DIR != "none" :
102100 assert Path (RUN_SAVE_DIR ).exists (), f"RUN_SAVE_DIR path doesn't exist: { RUN_SAVE_DIR } "
103101 self .run_save_dir = RUN_SAVE_DIR
104- # RUN_SAVE_DIR overwrites config save_dir
102+ # RUN_SAVE_DIR overwrites config save_dir if specified
105103 self .save_dir = os .path .join (RUN_SAVE_DIR , self .model .split ("/" )[1 ] + f"-{ self .scheme } " )
106104
107105 if not self .save_dir :
@@ -112,6 +110,7 @@ def set_up(self, test_data_file: str):
112110
113111 # script to run vllm if using vllm image
114112 if IS_VLLM_IMAGE :
113+ # script file containing vllm commands to run in the image
115114 self .vllm_bash = os .path .join (RUN_SAVE_DIR , "run-vllm.bash" )
116115 logger .info (f"vllm bash save dir: { self .vllm_bash } " )
117116
@@ -126,9 +125,6 @@ def test_vllm(self, test_data_file: str):
126125 # Run vLLM with saved model
127126
128127 self .set_up (test_data_file )
129- # not need this anymore?
130- #if not self.save_dir:
131- # self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
132128 oneshot_model , tokenizer = run_oneshot_for_e2e_testing (
133129 model = self .model ,
134130 model_class = self .model_class ,
@@ -221,9 +217,6 @@ def _run_vllm(self, logger):
221217 import subprocess
222218
223219 llm_kwargs = {"model" : self .save_dir }
224- #if IS_VLLM_IMAGE:
225- # llm_kwargs = {"model":
226- # self.save_dir.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR)}
227220
228221 if self .gpu_memory_utilization is not None :
229222 llm_kwargs ["gpu_memory_utilization" ] = self .gpu_memory_utilization
@@ -238,7 +231,6 @@ def _run_vllm(self, logger):
238231 logger .info (self .vllm_env )
239232
240233 if IS_VLLM_IMAGE :
241- #run_file_path = os.path.join(VLLM_VOLUME_MOUNT_DIR, "run_vllm.py")
242234 run_file_path = os .path .join (RUN_SAVE_DIR , "run_vllm.py" )
243235 shutil .copy (os .path .join (test_file_dir , "run_vllm.py" ),
244236 os .path .join (RUN_SAVE_DIR , "run_vllm.py" ))
@@ -253,38 +245,24 @@ def _run_vllm(self, logger):
253245 """ )
254246 os .chmod (self .vllm_bash , 0o755 )
255247 logger .info (f"Wrote vllm cmd into { self .vllm_bash } :" )
256- logger .info (vllm_cmd )
257248 if IS_VLLM_IMAGE_DEPLOYED :
258249 logger .info ("vllm image is deployed. Run vllm cmd with kubectl." )
259- cmds = [f"kubectl exec -it { VLLM_PYTHON_ENV } -n arc-runners" ,
260- f"-- /bin/bash { RUN_SAVE_DIR } /run-vllm.bash" ]
261- kubectl_cmd = " " .join (cmds )
262- logger .info (f"kubectl command: { kubectl_cmd } " )
263250 result = subprocess .Popen (
264251 [
265252 "kubectl" , "exec" , "-it" ,
266253 VLLM_PYTHON_ENV , "-n" , "arc-runners" ,
267- "--" , "/bin/bash" , f" { RUN_SAVE_DIR } /run-vllm.bash" ,
254+ "--" , "/bin/bash" , self . vllm_bash ,
268255 ],
269256 stdout = subprocess .PIPE ,
270257 stderr = subprocess .PIPE ,
271258 text = True )
272259 else :
273- cmds = ["podman run --rm --device nvidia.com/gpu=all --entrypoint" ,
274- #self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR),
275- self .vllm_bash ,
276- "-v" , #f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}",
277- f"{ RUN_SAVE_DIR } :{ RUN_SAVE_DIR } " ,
278- VLLM_PYTHON_ENV ]
279- podman_cmd = " " .join (cmds )
280- logger .info (f"podman command: { podman_cmd } " )
260+ logger .info ("vllm image is pulled. Run vllm cmd with podman." )
281261 result = subprocess .Popen (
282262 [
283263 "podman" , "run" , "--rm" ,
284264 "--device" , "nvidia.com/gpu=all" , "--entrypoint" ,
285- #self.vllm_bash.replace(RUN_SAVE_DIR, VLLM_VOLUME_MOUNT_DIR),
286- self .vllm_bash ,
287- "-v" , #f"{RUN_SAVE_DIR}:{VLLM_VOLUME_MOUNT_DIR}",
265+ self .vllm_bash , "-v" ,
288266 f"{ RUN_SAVE_DIR } :{ RUN_SAVE_DIR } " ,
289267 VLLM_PYTHON_ENV ,
290268 ],
0 commit comments