1+
12import os
23import re
34import shutil
2122 "TEST_DATA_FILE" , "tests/e2e/vLLM/configs/int8_dynamic_per_token.yaml"
2223)
2324SKIP_HF_UPLOAD = os .environ .get ("SKIP_HF_UPLOAD" , "" )
24- # vllm environment: same (default), the path of vllm virtualenv, deployed runner name
25+ # vllm python environment
2526VLLM_PYTHON_ENV = os .environ .get ("VLLM_PYTHON_ENV" , "same" )
2627IS_VLLM_IMAGE = False
27- RUN_SAVE_DIR = os .environ .get ("RUN_SAVE_DIR" , "none" )
28- # when using vllm image, needs to save the generated model
2928if VLLM_PYTHON_ENV .lower () != "same" and (not Path (VLLM_PYTHON_ENV ).exists ()):
3029 IS_VLLM_IMAGE = True
31- assert RUN_SAVE_DIR != "none" , "To use vllm image, RUN_SAVE_DIR must be set!"
32-
3330TIMINGS_DIR = os .environ .get ("TIMINGS_DIR" , "timings/e2e-test_vllm" )
3431os .environ ["VLLM_WORKER_MULTIPROC_METHOD" ] = "spawn"
3532EXPECTED_SAVED_FILES = [
@@ -83,32 +80,18 @@ def set_up(self, test_data_file: str):
8380 self .max_seq_length = eval_config .get ("max_seq_length" , 2048 )
8481 # GPU memory utilization - only set if explicitly provided in config
8582 self .gpu_memory_utilization = eval_config .get ("gpu_memory_utilization" )
86- if VLLM_PYTHON_ENV . lower () == " same" :
87- self . vllm_env = sys . executable
88- else :
83+ # vllm python env - if same, use the current python env, otherwise use
84+ # the python passed in VLLM_PYTHON_ENV
85+ if VLLM_PYTHON_ENV . lower () != "same" :
8986 self .vllm_env = VLLM_PYTHON_ENV
90-
91- if RUN_SAVE_DIR != "none" :
92- assert Path (
93- RUN_SAVE_DIR
94- ).exists (), f"RUN_SAVE_DIR path doesn't exist: { RUN_SAVE_DIR } "
95- self .run_save_dir = RUN_SAVE_DIR
96- # RUN_SAVE_DIR overwrites config save_dir if specified
97- self .save_dir = os .path .join (
98- RUN_SAVE_DIR , self .model .split ("/" )[1 ] + f"-{ self .scheme } "
99- )
87+ else :
88+ self .vllm_env = sys .executable
10089
10190 if not self .save_dir :
10291 self .save_dir = self .model .split ("/" )[1 ] + f"-{ self .scheme } "
10392
10493 logger .info ("========== RUNNING ==============" )
105- logger .info (f"model save dir: { self .save_dir } " )
106-
107- # script to run vllm if using vllm image
108- if IS_VLLM_IMAGE :
109- # script file containing vllm commands to run in the image
110- self .vllm_bash = os .path .join (RUN_SAVE_DIR , "run-vllm.bash" )
111- logger .info (f"vllm bash save dir: { self .vllm_bash } " )
94+ logger .info (self .save_dir )
11295
11396 self .prompts = [
11497 "The capital of France is" ,
@@ -117,9 +100,7 @@ def set_up(self, test_data_file: str):
117100 ]
118101 self .api = HfApi ()
119102
120- def test_vllm (self , test_data_file : str ):
121- # Run vLLM with saved model
122-
103+ def compress_model (self , test_data_file : str ):
123104 self .set_up (test_data_file )
124105 oneshot_model , tokenizer = run_oneshot_for_e2e_testing (
125106 model = self .model ,
@@ -133,12 +114,16 @@ def test_vllm(self, test_data_file: str):
133114 recipe = self .recipe ,
134115 quant_type = self .quant_type ,
135116 )
117+ self .oneshot_model = oneshot_model
118+ self .tokenizer = tokenizer
136119
137120 # check that session contains recipe
138121 self ._check_session_contains_recipe ()
139122
123+ def save_compressed_model (self ):
124+
140125 logger .info ("================= SAVING TO DISK ======================" )
141- self ._save_compressed_model (oneshot_model = oneshot_model , tokenizer = tokenizer )
126+ self ._save_compressed_model (oneshot_model = self . oneshot_model , tokenizer = self . tokenizer )
142127
143128 recipe_path = os .path .join (self .save_dir , "recipe.yaml" )
144129
@@ -153,8 +138,7 @@ def test_vllm(self, test_data_file: str):
153138 fp .write (recipe_yaml_str )
154139 session .reset ()
155140
156- # if vllm image is used, don't upload
157- if SKIP_HF_UPLOAD .lower () != "yes" and not IS_VLLM_IMAGE :
141+ if SKIP_HF_UPLOAD .lower () != "yes" :
158142 logger .info ("================= UPLOADING TO HUB ======================" )
159143
160144 stub = f"{ HF_MODEL_HUB_NAME } /{ self .save_dir } -e2e"
@@ -171,27 +155,22 @@ def test_vllm(self, test_data_file: str):
171155 folder_path = self .save_dir ,
172156 )
173157
158+ def test_vllm (self ):
159+ # Run vLLM with saved model
160+
174161 if IS_VLLM_IMAGE :
175- logger .info ("========== To run vLLM with vllm image ==========" )
162+ logger .info ("========== RUNNING vLLM in RHAIIS vllm image ==========" )
163+ elif VLLM_PYTHON_ENV .lower () == "same" :
164+ logger .info ("========== RUNNING vLLM in the same python env ==========" )
176165 else :
177- if VLLM_PYTHON_ENV .lower () == "same" :
178- logger .info ("========== RUNNING vLLM in the same python env ==========" )
179- else :
180- logger .info (
181- "========== RUNNING vLLM in a separate python env =========="
182- )
166+ logger .info ("========== RUNNING vLLM in a separate python env ==========" )
183167
184168 self ._run_vllm (logger )
185169
186170 self .tear_down ()
187171
188172 def tear_down (self ):
189- # model save_dir is needed for vllm image testing
190- if (
191- not IS_VLLM_IMAGE
192- and self .save_dir is not None
193- and os .path .isdir (self .save_dir )
194- ):
173+ if self .save_dir is not None and os .path .isdir (self .save_dir ):
195174 shutil .rmtree (self .save_dir )
196175
197176 timer = get_singleton_manager ()
@@ -229,11 +208,12 @@ def _run_vllm(self, logger):
229208
230209 test_file_dir = os .path .dirname (os .path .abspath (__file__ ))
231210
232- logger .info ("Run vllm using env:" )
211+ logger .info ("Run vllm in subprocess.Popen() using python env:" )
233212 logger .info (self .vllm_env )
234213
235214 if IS_VLLM_IMAGE :
236215 # generate python command to run in the vllm image
216+ RUN_SAVE_DIR = os .path .dirname (self .save_dir )
237217 run_file_path = os .path .join (RUN_SAVE_DIR , "run_vllm.py" )
238218 shutil .copy (
239219 os .path .join (test_file_dir , "run_vllm.py" ),
@@ -247,16 +227,17 @@ def _run_vllm(self, logger):
247227 f"'{ json_prompts } '" ,
248228 ]
249229 vllm_cmd = " " .join (cmds )
250- with open (self .vllm_bash , "w" ) as cf :
230+ vllm_bash = os .path .join (RUN_SAVE_DIR , "run-vllm.bash" )
231+ with open (vllm_bash , "w" ) as cf :
251232 cf .write (
252233 f"""#!/bin/bash
253234 export HF_HUB_OFFLINE=0
254235 export VLLM_NO_USAGE_STATS=1
255236 { vllm_cmd }
256237 """
257238 )
258- os .chmod (self . vllm_bash , 0o755 )
259- logger .info (f"Wrote vllm cmd into { self . vllm_bash } :" )
239+ os .chmod (vllm_bash , 0o755 )
240+ logger .info (f"Wrote vllm cmd into { vllm_bash } :" )
260241 logger .info ("vllm image. Run vllm cmd with kubectl." )
261242 result = subprocess .Popen (
262243 [
@@ -268,7 +249,7 @@ def _run_vllm(self, logger):
268249 "arc-runners" ,
269250 "--" ,
270251 "/bin/bash" ,
271- self . vllm_bash ,
252+ vllm_bash ,
272253 ],
273254 stdout = subprocess .PIPE ,
274255 stderr = subprocess .PIPE ,
0 commit comments