From 0f2d1978615fef98fc217719cf0776989483fe6e Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Mon, 10 Nov 2025 03:09:17 +0000 Subject: [PATCH 1/5] init Signed-off-by: Sage Moore --- vllm/model_executor/layers/fused_moe/shared_fused_moe.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py index 6b4a0b8cf073..01f8513dccaf 100644 --- a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py @@ -28,13 +28,16 @@ def __init__( super().__init__(**kwargs) self._shared_experts = shared_experts - # Disable shared expert overlap if we are not using + # Disable shared expert overlap if we are using eplb or not using # flashinfer + DP since there is nothing to be gained in this case. # Disabling the overlap optimization also prevents the shared experts # from being hidden from torch.compile. self.use_overlapped = ( use_overlapped - and not (self.use_flashinfer_cutlass_kernels and self.dp_size > 1) + and not ( + self.enable_eplb + or (self.use_flashinfer_cutlass_kernels and self.dp_size > 1) + ) and self._shared_experts is not None ) From cc5d3b7f0e6045f251e3a96fe29106a7fa319d91 Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Mon, 10 Nov 2025 03:10:30 +0000 Subject: [PATCH 2/5] fix comment Signed-off-by: Sage Moore --- vllm/model_executor/layers/fused_moe/shared_fused_moe.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py index 01f8513dccaf..b73738d953a6 100644 --- a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py @@ -28,10 +28,11 @@ def __init__( super().__init__(**kwargs) self._shared_experts = shared_experts - # Disable shared expert overlap if we are using eplb or not using - # flashinfer + DP since there is nothing to be gained in this case. - # Disabling the overlap optimization also prevents the shared experts - # from being hidden from torch.compile. + # Disable shared expert overlap if we are using eplb, because there + # are correctness issues, or not using flashinfer + DP since there + # is nothing to be gained in this case. Disabling the overlap + # optimization also prevents the shared experts from being hidden + # from torch.compile. self.use_overlapped = ( use_overlapped and not ( From cf88eef40f4faf6174a887c7e9a1a2c6dbd2381b Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Sun, 9 Nov 2025 19:34:40 -0800 Subject: [PATCH 3/5] Update vllm/model_executor/layers/fused_moe/shared_fused_moe.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Signed-off-by: Sage Moore Signed-off-by: Sage Moore --- vllm/model_executor/layers/fused_moe/shared_fused_moe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py index b73738d953a6..c33fe604a613 100644 --- a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py @@ -28,8 +28,8 @@ def __init__( super().__init__(**kwargs) self._shared_experts = shared_experts - # Disable shared expert overlap if we are using eplb, because there - # are correctness issues, or not using flashinfer + DP since there + # Disable shared expert overlap if we are using eplb, because of + # correctness issues, or if using flashinfer with DP, since there # is nothing to be gained in this case. Disabling the overlap # optimization also prevents the shared experts from being hidden # from torch.compile. From 87bef6c0b10f46610e9e901b9290df6810f79f62 Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Mon, 10 Nov 2025 07:29:01 -0800 Subject: [PATCH 4/5] Update vllm/model_executor/layers/fused_moe/shared_fused_moe.py Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Signed-off-by: Sage Moore Signed-off-by: Sage Moore --- vllm/model_executor/layers/fused_moe/shared_fused_moe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py index c33fe604a613..a79c00ad6f16 100644 --- a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py @@ -36,6 +36,7 @@ def __init__( self.use_overlapped = ( use_overlapped and not ( + TODO(wentao): find the root cause and remove this condition self.enable_eplb or (self.use_flashinfer_cutlass_kernels and self.dp_size > 1) ) From bd2c8466e6f2103d0503da4e849ec1c2bb98b7b4 Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Mon, 10 Nov 2025 10:39:01 -0500 Subject: [PATCH 5/5] Update vllm/model_executor/layers/fused_moe/shared_fused_moe.py Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Signed-off-by: Sage Moore --- vllm/model_executor/layers/fused_moe/shared_fused_moe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py index a79c00ad6f16..3d0c5636d6c0 100644 --- a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py @@ -36,7 +36,7 @@ def __init__( self.use_overlapped = ( use_overlapped and not ( - TODO(wentao): find the root cause and remove this condition + # TODO(wentao): find the root cause and remove this condition self.enable_eplb or (self.use_flashinfer_cutlass_kernels and self.dp_size > 1) )