init

SageMoore · SageMoore · commit 0f2d1978615f · 2025-11-10T18:26:52.000Z
Signed-off-by: Sage Moore &lt;sage@neuralmagic.com&gt;
diff --git a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py
@@ -28,13 +28,16 @@ def __init__(
         super().__init__(**kwargs)
         self._shared_experts = shared_experts
 
-        # Disable shared expert overlap if we are not using
+        # Disable shared expert overlap if we are using eplb or not using
         # flashinfer + DP since there is nothing to be gained in this case.
         # Disabling the overlap optimization also prevents the shared experts
         # from being hidden from torch.compile.
         self.use_overlapped = (
             use_overlapped
-            and not (self.use_flashinfer_cutlass_kernels and self.dp_size > 1)
+            and not (
+                self.enable_eplb
+                or (self.use_flashinfer_cutlass_kernels and self.dp_size > 1)
+            )
             and self._shared_experts is not None
         )