diff --git a/benchmarks/benchmark_lib.sh b/benchmarks/benchmark_lib.sh index 3cc9decdf..2cbd10b9c 100644 --- a/benchmarks/benchmark_lib.sh +++ b/benchmarks/benchmark_lib.sh @@ -23,6 +23,18 @@ check_env_vars() { fi } +# Setup torch profiler environment when ENABLE_PROFILE=true +# IMPORTANT: This runs when the library is sourced (before server starts) +# so that the server can detect VLLM_TORCH_PROFILER_DIR and enable profiler endpoints +if [ "${ENABLE_PROFILE}" = "true" ]; then + # Only set default directory if VLLM_TORCH_PROFILER_DIR is not already set + if [ -z "${VLLM_TORCH_PROFILER_DIR}" ]; then + export VLLM_TORCH_PROFILER_DIR="/workspace/profiling" + fi + mkdir -p "$VLLM_TORCH_PROFILER_DIR" + echo "Torch profiler enabled. Output directory: $VLLM_TORCH_PROFILER_DIR" +fi + # Wait for server to be ready by polling the health endpoint # All parameters are required # Parameters: @@ -106,6 +118,7 @@ wait_for_server_ready() { # --result-dir: Result directory # --use-chat-template: Optional flag to enable chat template # --server-pid: Optional server process ID to monitor during benchmark +# --enable-profile: Optional flag to enable torch profiler run_benchmark_serving() { set +x local model="" @@ -121,6 +134,7 @@ run_benchmark_serving() { local workspace_dir="" local use_chat_template=false local server_pid="" + local enable_profile=false while [[ $# -gt 0 ]]; do case $1 in @@ -176,6 +190,10 @@ run_benchmark_serving() { server_pid="$2" shift 2 ;; + --enable-profile) + enable_profile=true + shift + ;; *) echo "Unknown parameter: $1" return 1 @@ -255,6 +273,11 @@ run_benchmark_serving() { benchmark_cmd+=(--use-chat-template) fi + # Add --profile if torch profiler is enabled (via --enable-profile flag or ENABLE_PROFILE env var) + if [[ "$enable_profile" == true ]] || [[ "${ENABLE_PROFILE}" == "true" ]]; then + benchmark_cmd+=(--profile) + fi + # Run benchmark with optional server monitoring set -x if [[ -n "$server_pid" ]]; then diff --git a/runners/launch_b200-dgxc.sh b/runners/launch_b200-dgxc.sh index f0dbf2107..4a8c4b22f 100644 --- a/runners/launch_b200-dgxc.sh +++ b/runners/launch_b200-dgxc.sh @@ -32,6 +32,7 @@ docker run --rm --init --network host --name $server_name \ -e NCCL_GRAPH_REGISTER=0 \ -e TORCH_CUDA_ARCH_LIST="10.0" -e CUDA_DEVICE_ORDER=PCI_BUS_ID -e CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" \ -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RESULT_FILENAME -e RANDOM_RANGE_RATIO -e RUN_EVAL -e RUNNER_TYPE \ +-e ENABLE_PROFILE -e VLLM_TORCH_PROFILER_DIR \ --entrypoint=/bin/bash \ $(echo "$IMAGE" | sed 's/#/\//') \ benchmarks/"${EXP_NAME%%_*}_${PRECISION}_b200${FRAMEWORK_SUFFIX}.sh" diff --git a/runners/launch_h100-cr.sh b/runners/launch_h100-cr.sh index 976d5a5fd..eeeb42c25 100644 --- a/runners/launch_h100-cr.sh +++ b/runners/launch_h100-cr.sh @@ -12,6 +12,7 @@ docker run --rm --network=host --name=$server_name \ -v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \ -e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e ISL -e OSL -e RUN_EVAL -e RUNNER_TYPE -e RESULT_FILENAME -e RANDOM_RANGE_RATIO -e PORT=$PORT \ -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e TORCH_CUDA_ARCH_LIST="9.0" -e CUDA_DEVICE_ORDER=PCI_BUS_ID -e CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" \ +-e ENABLE_PROFILE -e VLLM_TORCH_PROFILER_DIR \ --entrypoint=/bin/bash \ $IMAGE \ benchmarks/"${EXP_NAME%%_*}_${PRECISION}_h100.sh" diff --git a/runners/launch_mi300x-amd.sh b/runners/launch_mi300x-amd.sh index cc70d5bbc..f88c470d1 100644 --- a/runners/launch_mi300x-amd.sh +++ b/runners/launch_mi300x-amd.sh @@ -15,6 +15,7 @@ docker run --rm --ipc=host --shm-size=16g --network=host --name=$server_name \ -v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \ -e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e PORT=$PORT \ -e ISL -e OSL -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RANDOM_RANGE_RATIO -e RESULT_FILENAME -e RUN_EVAL -e RUNNER_TYPE \ +-e ENABLE_PROFILE -e VLLM_TORCH_PROFILER_DIR \ --entrypoint=/bin/bash \ $IMAGE \ benchmarks/"${EXP_NAME%%_*}_${PRECISION}_mi300x.sh" diff --git a/runners/launch_mi300x-cr.sh b/runners/launch_mi300x-cr.sh index b2dbaee83..0ef826467 100644 --- a/runners/launch_mi300x-cr.sh +++ b/runners/launch_mi300x-cr.sh @@ -15,6 +15,7 @@ docker run --rm --ipc=host --shm-size=16g --network=host --name=$server_name \ -v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \ -e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e PORT=$PORT \ -e ISL -e OSL -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RANDOM_RANGE_RATIO -e RESULT_FILENAME -e RUN_EVAL -e RUNNER_TYPE \ +-e ENABLE_PROFILE -e VLLM_TORCH_PROFILER_DIR \ --entrypoint=/bin/bash \ $IMAGE \ benchmarks/"${EXP_NAME%%_*}_${PRECISION}_mi300x.sh"