File tree Expand file tree Collapse file tree 2 files changed +11
-1
lines changed Expand file tree Collapse file tree 2 files changed +11
-1
lines changed Original file line number Diff line number Diff line change @@ -1601,6 +1601,14 @@ def __init__(
16011601 self .check ()
16021602 self .print ()
16031603
1604+ def _disable_sequence_parallel_moe_if_needed (self , mode_name ):
1605+ if self .parallel_config .use_sequence_parallel_moe and self .graph_opt_config .use_cudagraph :
1606+ self .parallel_config .use_sequence_parallel_moe = False
1607+ logger .warning (
1608+ f"Sequence parallel MoE does not support { mode_name } mode with cudagraph. "
1609+ "Setting use_sequence_parallel_moe to False."
1610+ )
1611+
16041612 def postprocess (self ):
16051613 """
16061614 calculate some parameters
@@ -1685,10 +1693,12 @@ def postprocess(self):
16851693 logger .info ("Multi-modal models do not support prefix caching when using CUDAGraph!" )
16861694
16871695 if self .scheduler_config .splitwise_role == "mixed" :
1696+ self ._disable_sequence_parallel_moe_if_needed ("Mixed" )
16881697 self .model_config .moe_phase = MoEPhase (phase = "prefill" )
16891698 elif self .scheduler_config .splitwise_role == "prefill" :
16901699 self .model_config .moe_phase = MoEPhase (phase = "prefill" )
16911700 elif self .scheduler_config .splitwise_role == "decode" :
1701+ self ._disable_sequence_parallel_moe_if_needed ("PD's decode node" )
16921702 self .model_config .moe_phase = MoEPhase (phase = "decode" )
16931703 else :
16941704 raise NotImplementedError
Original file line number Diff line number Diff line change @@ -37,7 +37,7 @@ opentelemetry-api>=1.24.0
3737opentelemetry-sdk >= 1.24.0
3838opentelemetry-instrumentation-redis
3939opentelemetry-instrumentation-mysql
40- opentelemetry-distro
40+ opentelemetry-distro
4141opentelemetry-exporter-otlp
4242opentelemetry-instrumentation-fastapi
4343opentelemetry-instrumentation-logging
You can’t perform that action at this time.
0 commit comments