Skip to content

Commit 4fed2b9

Browse files
committed
revise note
1 parent bd1db25 commit 4fed2b9

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

fastdeploy/worker/xpu_model_runner.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ def only_decode(self):
420420
prefill_exists = None
421421
# mix ep in single node
422422
if self.fd_config.parallel_config.use_ep and self.fd_config.scheduler_config.splitwise_role == "mixed":
423-
# 在ep场景下no_need_stop如果都是F,表示全部卡空闲,返回false,走高吞吐分支,否则为部分卡空闲,需要进一步判断
423+
# 在ep场景下no_need_stop如果都是false,表示全部卡空闲,返回false,走高吞吐分支,否则为部分卡空闲,需要进一步判断
424424
no_need_stop_list = []
425425
no_need_stops = self.not_need_stop()
426426
paddle.distributed.all_gather_object(no_need_stop_list, not no_need_stops)
@@ -930,7 +930,9 @@ def _prepare_inputs(self, is_dummy_run=False) -> None:
930930
self.initialize_attention_backend()
931931

932932
if_only_decode = self.only_decode()
933-
if self.fd_config.scheduler_config.splitwise_role == "mixed": # 混合式默认初始化为prefill
933+
if (
934+
self.fd_config.scheduler_config.splitwise_role == "mixed"
935+
): # 集中式场景,phase默认初始化为prefill, 推理运行时不同类型的batch能够在此处实现phase切换
934936
self.fd_config.model_config.moe_phase.phase = "decode" if if_only_decode else "prefill"
935937

936938
# Get sampling metadata

0 commit comments

Comments
 (0)