We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a5bd54a commit 9a17437Copy full SHA for 9a17437
vllm/v1/worker/utils.py
@@ -316,11 +316,7 @@ def bind_kv_cache(
316
# TODO - analyze where runner_kv_caches is used and the right
317
# way to ensure it properly reflects multiple attention layers
318
# in the same decoder block.
319
- if (
320
- current_platform.is_cuda()
321
- or current_platform.is_xpu()
322
- or current_platform.is_rocm()
323
- ):
+ if current_platform.is_cuda_alike() or current_platform.is_xpu():
324
# We know that the GPU runner is not impacted by this
325
# case. Some test code depends on runner_kv_caches, but
326
# not in a way that's impacted by ignoring this.
0 commit comments