PaddlePaddle
diff --git a/‎.github/pull_request_template.md‎
Lines changed: 4 additions & 0 deletions b/‎.github/pull_request_template.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/_base_test.yml‎
Lines changed: 7 additions & 0 deletions b/‎.github/workflows/_base_test.yml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎.github/workflows/_unit_test_coverage.yml‎
Lines changed: 11 additions & 1 deletion b/‎.github/workflows/_unit_test_coverage.yml‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎.github/workflows/ci_xpu.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/ci_xpu.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎benchmarks/README.md‎
Lines changed: 30 additions & 3 deletions b/‎benchmarks/README.md‎
Lines changed: 30 additions & 3 deletions
diff --git a/‎benchmarks/backend_request_func.py‎
Lines changed: 8 additions & 0 deletions b/‎benchmarks/backend_request_func.py‎
Lines changed: 8 additions & 0 deletions
@@ -6,6 +6,10 @@
 
 <!-- Describe the purpose and goals of this pull request. -->
 
+> :bulb: If this PR is a Cherry Pick, the PR title needs to follow the format by adding the [Cherry-Pick] label at the very beginning and appending the original PR ID at the end. For example, [Cherry-Pick][CI] Add check trigger and logic(#5191)
+
+> :bulb: 如若此PR是Cherry Pick，PR标题需遵循格式，在最开始加上[Cherry-Pick]标签，以及最后面加上原PR ID，例如[Cherry-Pick][CI] Add check trigger and logic(#5191) 
+
 ## Modifications
 
 <!-- Detail the changes made in this pull request. -->
 
@@ -206,6 +206,13 @@ jobs:
           check_service 90
           python -m pytest -sv test_max_waiting_time.py || TEST_EXIT_CODE=1
 
+          export TEMPLATE=TOKEN_NORMAL
+          curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
+            -H "Content-Type: application/json" \
+            -d "{\"--model\": \"/MODELDATA/ERNIE-4.5-VL-28B-A3B-Thinking\", \"--reasoning-parser\": \"ernie-45-vl-thinking\", \"--tool-call-parser\": \"ernie-45-vl-thinking\", \"--tensor-parallel-size\": 1, \"--quantization\": \"wint4\", \"--max-model-len\": 131072, \"--max-num-seqs\": 32}"
+          check_service 90
+          python -m pytest -sv test_prompt_ids.py || TEST_EXIT_CODE=1
+
           popd
           echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> /workspace/FastDeploy/exit_code.env
           '
 
@@ -105,6 +105,7 @@ jobs:
           FD_CACHE_QUEUE_PORT=$((8098 + DEVICE_PORT * 100))
           FD_ROUTER_PORT=$((8048 + DEVICE_PORT * 100))
           FD_CONNECTOR_PORT=$((8038 + DEVICE_PORT * 100))
+          FD_RDMA_PORT=$((8028 + DEVICE_PORT * 100))
           echo "Test ENV Parameter:"
           echo "========================================================="
           echo "FLASK_PORT=${FLASK_PORT}"
@@ -114,6 +115,7 @@ jobs:
           echo "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}"
           echo "FD_ROUTER_PORT=${FD_ROUTER_PORT}"
           echo "FD_CONNECTOR_PORT=${FD_CONNECTOR_PORT}"
+          echo "FD_RDMA_PORT=${FD_RDMA_PORT}"
           echo "DEVICES=${DEVICES}"
           echo "========================================================="
 
@@ -149,9 +151,15 @@ jobs:
             docker rm -f ${runner_name} || true
           fi
 
+          export RDMA_DEVICES=$(find /dev/infiniband/uverbs* -maxdepth 1 -not -type d | xargs -I{} echo '--device {}:{}')
+
           docker run --rm --net=host \
           --name ${runner_name} \
-          --cap-add=SYS_PTRACE --shm-size=64G \
+          --cap-add=SYS_PTRACE --cap-add=IPC_LOCK \
+          --shm-size=64G \
+          ${RDMA_DEVICES} \
+          --device=/dev/infiniband/rdma_cm \
+          --ulimit memlock=-1:-1 \
           -v $(pwd):/workspace -w /workspace \
           -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
           -v "${CACHE_DIR}/.cache:/root/.cache" \
@@ -165,6 +173,8 @@ jobs:
           -e "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}" \
           -e "FD_ROUTER_PORT=${FD_ROUTER_PORT}" \
           -e "FD_CONNECTOR_PORT=${FD_CONNECTOR_PORT}" \
+          -e "FD_RDMA_PORT=${FD_RDMA_PORT}" \
+          -e "CLEAN_CUDA=1" \
           -e TZ="Asia/Shanghai" \
           -e "fd_wheel_url=${fd_wheel_url}" \
           -e "BASE_REF=${BASE_REF}" \
 
@@ -28,7 +28,7 @@ jobs:
 
       - name: Code Checkout
         env:
-          docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.2.0
+          docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:ci
         run: |
           REPO="https://github.com/${{ github.repository }}.git"
           FULL_REPO="${{ github.repository }}"
@@ -59,7 +59,7 @@ jobs:
 
       - name: Run CI unittest
         env:
-          docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.2.0
+          docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:ci
         run: |
           runner_name="${{ runner.name }}"
           last_char="${runner_name: -1}"
 
@@ -58,7 +58,7 @@ python benchmark_serving.py \
   --port 9812 \
   --dataset-name EBChat \
   --dataset-path ./filtered_sharedgpt_2000_input_1136_output_200_fd.json \
-  --hyperparameter-path yaml/request_yaml/eb45t-32k.yaml \
+  --hyperparameter-path yaml/request_yaml/eb45-32k.yaml \
   --percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
   --metric-percentiles 80,95,99,99.9,99.95,99.99 \
   --num-prompts 1 \
@@ -78,7 +78,7 @@ python benchmark_serving.py \
   --port 9812 \
   --dataset-name EBChat \
   --dataset-path ./filtered_sharedgpt_2000_input_1136_output_200_fd.json \
-  --hyperparameter-path yaml/request_yaml/eb45t-32k.yaml \
+  --hyperparameter-path yaml/request_yaml/eb45-32k.yaml \
   --percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
   --metric-percentiles 80,95,99,99.9,99.95,99.99 \
   --num-prompts 2000 \
@@ -100,7 +100,7 @@ python benchmark_serving.py \
   --port 9812 \
   --dataset-name EBChat \
   --dataset-path ./filtered_sharedgpt_2000_input_1136_output_200_fd.json \
-  --hyperparameter-path yaml/request_yaml/eb45t-32k.yaml \
+  --hyperparameter-path yaml/request_yaml/eb45-32k.yaml \
   --percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
   --metric-percentiles 80,95,99,99.9,99.95,99.99 \
   --num-prompts 2000 \
@@ -135,3 +135,30 @@ python benchmarks/benchmark_mtp.py \
 --dataset-name：指定数据集类，指定为"EBChat"可读取转存的FD格式数据集
 --dataset-path：测试数据集路径
 ```
+
+### 指定输入输出长度，构造随机纯文输入测试
+
+相关参数：
+- --dataset-name：指定数据集类，指定为"random"可构造随机纯文输入
+- --random-input-len：随机输入长度，对应英文单词数，默认200
+- --random-output-len：随机输出长度，默认1024
+- --random-range-ratio：输入输出长度变化范围比，[length *(1 - range_ratio), length* (1 + range_ratio)]，默认0.1
+
+#### 使用方式：
+```bash
+python benchmark_serving.py \
+  --backend openai-chat \
+  --model EB45T \
+  --endpoint /v1/chat/completions \
+  --host 0.0.0.0 \
+  --port 9812 \
+  --dataset-name random \
+  --random-input-len 200 \
+  --random-output-len 1024 \
+  --random-range-ratio 0.1 \
+  --percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
+  --metric-percentiles 80,95,99,99.9,99.95,99.99 \
+  --num-prompts 2000 \
+  --max-concurrency 100 \
+  --save-result > infer_log.txt 2>&1 &
+```
@@ -52,6 +52,7 @@ class RequestFuncInput:
     language: Optional[str] = None
     debug: bool = False
     response_format: Optional[dict] = None
+    random_flag: bool = False
 
 
 @dataclass
@@ -103,6 +104,13 @@ async def async_request_eb_openai_chat_completions(
         # 超参由yaml传入
         payload.update(request_func_input.hyper_parameters)
 
+        # 随机输入开关
+        if request_func_input.random_flag:
+            payload["max_tokens"] = request_func_input.output_len
+            metadata = payload.get("metadata", {})
+            metadata["min_tokens"] = request_func_input.output_len
+            payload["metadata"] = metadata
+
         if request_func_input.ignore_eos:
             payload["ignore_eos"] = request_func_input.ignore_eos