Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions vllm/model_executor/layers/fused_moe/shared_fused_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,18 @@ def __init__(
super().__init__(**kwargs)
self._shared_experts = shared_experts

# Disable shared expert overlap if we are not using
# flashinfer + DP since there is nothing to be gained in this case.
# Disabling the overlap optimization also prevents the shared experts
# from being hidden from torch.compile.
# Disable shared expert overlap if we are using eplb, because of
# correctness issues, or if using flashinfer with DP, since there
# is nothing to be gained in this case. Disabling the overlap
# optimization also prevents the shared experts from being hidden
# from torch.compile.
self.use_overlapped = (
use_overlapped
and not (self.use_flashinfer_cutlass_kernels and self.dp_size > 1)
and not (
# TODO(wentao): find the root cause and remove this condition
self.enable_eplb
or (self.use_flashinfer_cutlass_kernels and self.dp_size > 1)
)
and self._shared_experts is not None
)

Expand Down