diff --git a/tests/e2e/vLLM/rhaiis-e2e-smoke.list b/tests/e2e/vLLM/rhaiis-e2e-smoke.list new file mode 100644 index 0000000000..d66618aaec --- /dev/null +++ b/tests/e2e/vLLM/rhaiis-e2e-smoke.list @@ -0,0 +1,7 @@ +fp4_nvfp4.yaml +fp8_dynamic_per_token.yaml +kv_cache_gptq_tinyllama.yaml +sparse2of4_fp8_dynamic.yaml +w4a16_grouped_quant_asym_awq.yaml +w4a16_actorder_weight.yaml +int8_channel_weight_static_per_tensor_act.yaml diff --git a/tests/e2e/vLLM/run_tests_in_rhaiis.sh b/tests/e2e/vLLM/run_tests_in_rhaiis.sh new file mode 100644 index 0000000000..6f30028f21 --- /dev/null +++ b/tests/e2e/vLLM/run_tests_in_rhaiis.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +usage() { + echo "Usage: $0 -c -t -s " + exit 1 +} + +while getopts "c:t:s:" OPT; do + case ${OPT} in + c ) + CONFIG="$OPTARG" + ;; + t ) + TEST="$OPTARG" + ;; + s ) + SAVE_DIR="$OPTARG" + ;; + \? ) + exit 1 + ;; + esac +done + +if [[ -z "$CONFIG" || -z "$TEST" || -z "$SAVE_DIR" ]]; then + echo "Error: -c, -t, and -s are required." + usage +fi + +script_path=$(dirname "${BASH_SOURCE[0]}") +if [ -d "$CONFIG" ]; then + echo "Config is provided as a folder: $CONFIG" + CONFIGS=`ls "$CONFIG"` +elif [ -f "$CONFIG" ]; then + echo "Config is provided as a file: $CONFIG" + CONFIGS=`cat "$CONFIG"` +fi + +SUCCESS=0 + +# Parse list of configs and add save_dir +rm -rf $SAVE_DIR/configs +mkdir -p $SAVE_DIR/configs +for MODEL_CONFIG in $(echo -e "$CONFIGS" | sed "s|^|${script_path}/configs/|") +do + FILE_NAME=$(basename $MODEL_CONFIG) + CONFIG_FILE=$SAVE_DIR/configs/$FILE_NAME + + save_dir=$(cat $MODEL_CONFIG | grep 'save_dir:' | cut -d' ' -f2) + model=$(cat $MODEL_CONFIG | grep 'model:' | cut -d'/' -f2) + scheme=$(cat $MODEL_CONFIG | grep 'scheme:' | cut -d' ' -f2) + + # add or overwrite save_dir for each model + if [[ -z "$save_dir" ]]; then + { cat $MODEL_CONFIG; echo -e "\nsave_dir: $SAVE_DIR/$model-$scheme"; } > $CONFIG_FILE + else + { cat $MODEL_CONFIG | grep -v 'save_dir'; echo "save_dir: $SAVE_DIR/$save_dir"; } > $CONFIG_FILE + fi + + echo "=== RUNNING MODEL: $CONFIG_FILE ===" + cat $CONFIG_FILE + + LOCAL_SUCCESS=0 + export TEST_DATA_FILE="$CONFIG_FILE" + pytest \ + --capture=tee-sys \ + "$TEST" || LOCAL_SUCCESS=$? + + if [[ $LOCAL_SUCCESS == 0 ]]; then + echo "=== PASSED MODEL: $CONFIG_FILE ===" + else + echo "=== FAILED MODEL: $CONFIG_FILE ===" + fi + + SUCCESS=$((SUCCESS + LOCAL_SUCCESS)) + +done + +exit "$SUCCESS" diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 9c099a5aea..9e19a1ef19 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -23,6 +23,9 @@ SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "") # vllm python environment VLLM_PYTHON_ENV = os.environ.get("VLLM_PYTHON_ENV", "same") +IS_VLLM_IMAGE = False +if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()): + IS_VLLM_IMAGE = True TIMINGS_DIR = os.environ.get("TIMINGS_DIR", "timings/e2e-test_vllm") os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" EXPECTED_SAVED_FILES = [ @@ -96,12 +99,8 @@ def set_up(self, test_data_file: str): ] self.api = HfApi() - def test_vllm(self, test_data_file: str): - # Run vLLM with saved model - + def compress_model(self, test_data_file: str): self.set_up(test_data_file) - if not self.save_dir: - self.save_dir = self.model.split("/")[1] + f"-{self.scheme}" oneshot_model, tokenizer = run_oneshot_for_e2e_testing( model=self.model, model_class=self.model_class, @@ -114,12 +113,17 @@ def test_vllm(self, test_data_file: str): recipe=self.recipe, quant_type=self.quant_type, ) + self.oneshot_model = oneshot_model + self.tokenizer = tokenizer # check that session contains recipe self._check_session_contains_recipe() + def save_compressed_model(self): logger.info("================= SAVING TO DISK ======================") - self._save_compressed_model(oneshot_model=oneshot_model, tokenizer=tokenizer) + self._save_compressed_model( + oneshot_model=self.oneshot_model, tokenizer=self.tokenizer + ) recipe_path = os.path.join(self.save_dir, "recipe.yaml") @@ -151,7 +155,15 @@ def test_vllm(self, test_data_file: str): folder_path=self.save_dir, ) - if VLLM_PYTHON_ENV.lower() == "same": + def test_vllm(self, test_data_file: str): + self.compress_model(test_data_file) + + self.save_compressed_model() + + # Run vLLM with saved model + if IS_VLLM_IMAGE: + logger.info("========== RUNNING vLLM in RHAIIS vllm image ==========") + elif VLLM_PYTHON_ENV.lower() == "same": logger.info("========== RUNNING vLLM in the same python env ==========") else: logger.info("========== RUNNING vLLM in a separate python env ==========") @@ -198,17 +210,68 @@ def _run_vllm(self, logger): json_prompts = json.dumps(self.prompts) test_file_dir = os.path.dirname(os.path.abspath(__file__)) - run_file_path = os.path.join(test_file_dir, "run_vllm.py") - logger.info("Run vllm in subprocess.Popen() using python env:") - logger.info(self.vllm_env) + if IS_VLLM_IMAGE: + # generate python command to run in the vllm image + RUN_SAVE_DIR = os.path.dirname(self.save_dir) + run_file_path = os.path.join(RUN_SAVE_DIR, "run_vllm.py") + shutil.copy( + os.path.join(test_file_dir, "run_vllm.py"), + os.path.join(RUN_SAVE_DIR, "run_vllm.py"), + ) + cmds = [ + "python", + run_file_path, + f"'{json_scheme}'", + f"'{json_llm_kwargs}'", + f"'{json_prompts}'", + ] + vllm_cmd = " ".join(cmds) + vllm_bash = os.path.join(RUN_SAVE_DIR, "run-vllm.bash") + with open(vllm_bash, "w") as cf: + cf.write( + f"""#!/bin/bash + export HF_HUB_OFFLINE=0 + export VLLM_NO_USAGE_STATS=1 + {vllm_cmd} + """ + ) + os.chmod(vllm_bash, 0o755) + logger.info(f"Wrote vllm cmd into {vllm_bash}:") + logger.info("vllm image. Run vllm cmd with kubectl.") + result = subprocess.Popen( + [ + "kubectl", + "exec", + "-it", + VLLM_PYTHON_ENV, + "-n", + "arc-runners", + "--", + "/bin/bash", + vllm_bash, + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + else: + run_file_path = os.path.join(test_file_dir, "run_vllm.py") + logger.info("Run vllm in subprocess.Popen using python env:") + logger.info(self.vllm_env) + result = subprocess.Popen( + [ + self.vllm_env, + run_file_path, + json_scheme, + json_llm_kwargs, + json_prompts, + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) - result = subprocess.Popen( - [self.vllm_env, run_file_path, json_scheme, json_llm_kwargs, json_prompts], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - ) stdout, stderr = result.communicate() logger.info(stdout)