Skip to content

Commit 6356a77

Browse files
authored
Merge branch 'PaddlePaddle:develop' into coverage_combine_check
2 parents aa27516 + 6584ee9 commit 6356a77

35 files changed

+5019
-3538
lines changed

.github/workflows/_accuracy_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ jobs:
143143
-v "${CACHE_DIR}/ConfigDir:/root/.config" \
144144
-e TZ="Asia/Shanghai" \
145145
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
146-
python -m pip install paddlepaddle-gpu==3.3.0.dev20251112 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
146+
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
147147
148148
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
149149

.github/workflows/_base_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ jobs:
143143
-v "${CACHE_DIR}/ConfigDir:/root/.config" \
144144
-e TZ="Asia/Shanghai" \
145145
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
146-
python -m pip install paddlepaddle-gpu==3.3.0.dev20251112 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
146+
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
147147
148148
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
149149

.github/workflows/_build_linux.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ jobs:
155155
elif [[ "${PADDLEVERSION}" != "" ]];then
156156
python -m pip install paddlepaddle-gpu==${PADDLEVERSION} -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
157157
else
158-
python -m pip install paddlepaddle-gpu==3.3.0.dev20251112 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
158+
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
159159
fi
160160
161161
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple

.github/workflows/_logprob_test_linux.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ jobs:
134134
-v "${CACHE_DIR}/ConfigDir:/root/.config" \
135135
-e TZ="Asia/Shanghai" \
136136
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
137-
python -m pip install paddlepaddle-gpu==3.3.0.dev20251112 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
137+
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
138138
139139
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
140140

.github/workflows/_pre_ce_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ jobs:
151151
--gpus "\"device=${DEVICES}\"" ${docker_image} /bin/bash -c '
152152
git config --global --add safe.directory /workspace/FastDeploy
153153
cd FastDeploy
154-
python -m pip install paddlepaddle-gpu==3.3.0.dev20251112 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
154+
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
155155
python -m pip install ${fd_wheel_url}
156156
bash scripts/run_pre_ce.sh
157157
'

.github/workflows/_stable_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ jobs:
146146
-v "${CACHE_DIR}/ConfigDir:/root/.config" \
147147
-e TZ="Asia/Shanghai" \
148148
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
149-
python -m pip install paddlepaddle-gpu==3.3.0.dev20251112 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
149+
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
150150
151151
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
152152

.github/workflows/_unit_test_coverage.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ jobs:
176176
git config --global --add safe.directory /workspace/FastDeploy
177177
cd FastDeploy
178178
git diff origin/${BASE_REF}..HEAD --unified=0 > diff.txt
179-
python -m pip install paddlepaddle-gpu==3.3.0.dev20251112 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
179+
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
180180
pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
181181
182182
python -m pip install -r scripts/unittest_requirement.txt

.github/workflows/rerun.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,16 @@ jobs:
5656
REPO: ${{ github.event.repository.name }}
5757
JOB_NAME: 'CI_XPU'
5858

59+
- name: Rerun Check PR Template
60+
if: ${{ contains(github.event.comment.body, 'check_pr_template') }}
61+
uses: ./.github/actions/rerun-workflow
62+
with:
63+
PR_ID: ${{ github.event.issue.number }}
64+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
65+
OWNER: ${{ github.repository_owner }}
66+
REPO: ${{ github.event.repository.name }}
67+
JOB_NAME: 'Check PR Template'
68+
5969
- name: Rerun Codestyle-check
6070
if: ${{ contains(github.event.comment.body, 'codestyle') || contains(github.event.comment.body, 'pre_commit') }}
6171
uses: ./.github/actions/rerun-workflow

custom_ops/gpu_ops/append_attn/append_attention_func.cuh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2296,6 +2296,9 @@ __global__ void merge_multi_chunks_decoder_kernel(
22962296
const int bid = blockIdx.x, hid = blockIdx.y;
22972297
__shared__ T smem[bdy * HEAD_DIM];
22982298
__shared__ float md_smem[bdy * 2];
2299+
#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
2300+
cudaGridDependencySynchronize();
2301+
#endif
22992302
const int start_token_idx = cu_seqlens_q[bid];
23002303
const int seq_len_q = seq_lens_q[bid];
23012304
if (seq_len_q == 0) return;
@@ -2332,6 +2335,10 @@ __global__ void merge_multi_chunks_decoder_kernel(
23322335
} else if constexpr (std::is_same<T, __nv_bfloat16>::value) {
23332336
m = -3.0e+30f;
23342337
}
2338+
#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
2339+
cudaGridDependencySynchronize();
2340+
#endif
2341+
23352342
#pragma unroll 2
23362343
for (int i = ty; i < num_chunks_this_seq; i += bdy) {
23372344
uint32_t offset = (bid * num_chunks + i) * num_heads + hid;
@@ -2397,6 +2404,9 @@ __global__ void merge_multi_chunks_decoder_kernel(
23972404
out_vec,
23982405
&out[(start_token_idx * num_heads + hid) * head_dim + vid * vec_size]);
23992406
}
2407+
#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
2408+
cudaTriggerProgrammaticLaunchCompletion();
2409+
#endif
24002410
}
24012411

24022412
template <typename T,
@@ -2433,6 +2443,9 @@ __global__ void merge_multi_chunks_v2_kernel(
24332443
const int hid = blockIdx.y;
24342444
__shared__ T smem[bdy * HEAD_DIM];
24352445
__shared__ float md_smem[bdy * 2];
2446+
#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
2447+
cudaGridDependencySynchronize();
2448+
#endif
24362449
for (int qid = blockIdx.x; qid < token_num; qid += gridDim.x) {
24372450
const uint32_t bid = batch_id_per_token[qid];
24382451
if (bid == -1) {
@@ -2569,4 +2582,7 @@ __global__ void merge_multi_chunks_v2_kernel(
25692582
}
25702583
__syncthreads();
25712584
}
2585+
#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
2586+
cudaTriggerProgrammaticLaunchCompletion();
2587+
#endif
25722588
}

0 commit comments

Comments
 (0)