|
16 | 16 | from tests.testing_utils import requires_gpu |
17 | 17 |
|
18 | 18 |
|
19 | | -def is_quay_image(url: str) -> bool: |
20 | | - pattern = r"^quay\.io/[a-z0-9][a-z0-9-_]*/[a-z0-9][a-z0-9-_/]*:[\w][\w.-]*$" |
21 | | - return re.match(pattern, url) is not None |
22 | | - |
23 | 19 | HF_MODEL_HUB_NAME = "nm-testing" |
24 | 20 |
|
25 | 21 | TEST_DATA_FILE = os.environ.get( |
26 | 22 | "TEST_DATA_FILE", "tests/e2e/vLLM/configs/int8_dynamic_per_token.yaml" |
27 | 23 | ) |
28 | 24 | SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "") |
29 | | -# vllm environment: same (default), the path of vllm virtualenv, image url, deployed runner name |
| 25 | +# vllm environment: same (default), the path of vllm virtualenv, deployed runner name |
30 | 26 | VLLM_PYTHON_ENV = os.environ.get("VLLM_PYTHON_ENV", "same") |
31 | 27 | IS_VLLM_IMAGE = False |
32 | | -IS_VLLM_IMAGE_DEPLOYED=False |
33 | 28 | RUN_SAVE_DIR=os.environ.get("RUN_SAVE_DIR", "none") |
34 | 29 | # when using vllm image, needs to save the generated model |
35 | 30 | if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()): |
36 | 31 | IS_VLLM_IMAGE = True |
37 | | - if not is_quay_image(VLLM_PYTHON_ENV): |
38 | | - IS_VLLM_IMAGE_DEPLOYED = True |
39 | | - assert RUN_SAVE_DIR != "none", "To use vllm image, RUN_SAVE_DIR must be set!" |
| 32 | + assert RUN_SAVE_DIR != "none", "To use vllm image, RUN_SAVE_DIR must be set!" |
40 | 33 |
|
41 | 34 | TIMINGS_DIR = os.environ.get("TIMINGS_DIR", "timings/e2e-test_vllm") |
42 | 35 | os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" |
@@ -246,30 +239,16 @@ def _run_vllm(self, logger): |
246 | 239 | """) |
247 | 240 | os.chmod(self.vllm_bash, 0o755) |
248 | 241 | logger.info(f"Wrote vllm cmd into {self.vllm_bash}:") |
249 | | - if IS_VLLM_IMAGE_DEPLOYED: |
250 | | - logger.info("vllm image is deployed. Run vllm cmd with kubectl.") |
251 | | - result = subprocess.Popen( |
252 | | - [ |
253 | | - "kubectl", "exec", "-it", |
254 | | - VLLM_PYTHON_ENV, "-n", "arc-runners", |
255 | | - "--", "/bin/bash", self.vllm_bash, |
256 | | - ], |
257 | | - stdout=subprocess.PIPE, |
258 | | - stderr=subprocess.PIPE, |
259 | | - text=True) |
260 | | - else: |
261 | | - logger.info("vllm image is pulled locally. Run vllm cmd with podman.") |
262 | | - result = subprocess.Popen( |
263 | | - [ |
264 | | - "podman", "run", "--rm", |
265 | | - "--device", "nvidia.com/gpu=all", "--entrypoint", |
266 | | - self.vllm_bash, "-v", |
267 | | - f"{RUN_SAVE_DIR}:{RUN_SAVE_DIR}", |
268 | | - VLLM_PYTHON_ENV, |
269 | | - ], |
270 | | - stdout=subprocess.PIPE, |
271 | | - stderr=subprocess.PIPE, |
272 | | - text=True) |
| 242 | + logger.info("vllm image. Run vllm cmd with kubectl.") |
| 243 | + result = subprocess.Popen( |
| 244 | + [ |
| 245 | + "kubectl", "exec", "-it", |
| 246 | + VLLM_PYTHON_ENV, "-n", "arc-runners", |
| 247 | + "--", "/bin/bash", self.vllm_bash, |
| 248 | + ], |
| 249 | + stdout=subprocess.PIPE, |
| 250 | + stderr=subprocess.PIPE, |
| 251 | + text=True) |
273 | 252 | else: |
274 | 253 | run_file_path = os.path.join(test_file_dir, "run_vllm.py") |
275 | 254 | logger.info("Run vllm in subprocess.Popen using python env:") |
|
0 commit comments