From e8a900b4c5be81c9a1332f401313f3ff2ef6232b Mon Sep 17 00:00:00 2001 From: Weiliang Liu Date: Tue, 11 Nov 2025 08:30:40 +0000 Subject: [PATCH] Fix the issue that run out of requests make no parallelism in PP mode in ray backend. Signed-off-by: Weiliang Liu --- vllm/v1/core/sched/scheduler.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index 4323141c435b..5ac3a0d01b98 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -397,7 +397,12 @@ def schedule(self) -> SchedulerOutput: while self.waiting and token_budget > 0: if len(self.running) == self.max_num_running_reqs: break - + if len(scheduled_resumed_reqs) + len(scheduled_new_reqs) >= max( + 1, + self.max_num_running_reqs + // self.parallel_config.pipeline_parallel_size, + ): + break request = self.waiting.peek_request() # KVTransfer: skip request if still waiting for remote kvs.