Skip to content

Commit 29511eb

Browse files
committed
update execute_model
1 parent 41ebfec commit 29511eb

File tree

1 file changed

+28
-0
lines changed

1 file changed

+28
-0
lines changed

src/parallax/vllm/model_runner.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,34 @@ def __init__(
150150
f"ParallaxVLLMModelRunner initialized: layers [{start_layer}, {end_layer}), "
151151
f"is_first={self.is_first_peer}, is_last={self.is_last_peer}"
152152
)
153+
154+
def execute_model(
155+
self,
156+
scheduler_output: Any,
157+
intermediate_tensors: Optional[Any] = None,
158+
) -> Any:
159+
"""
160+
Execute the model with proper intermediate tensors handling for pipeline parallelism.
161+
162+
For vLLM v1, intermediate_tensors must be set as an instance variable
163+
AND passed as a parameter for proper pipeline parallelism support.
164+
"""
165+
# Set intermediate_tensors as instance variable if provided
166+
# This is needed for sync_and_slice_intermediate_tensors to work
167+
if intermediate_tensors is not None:
168+
self.intermediate_tensors = intermediate_tensors
169+
logger.debug(
170+
f"Set intermediate_tensors on model_runner for PP "
171+
f"(is_first_peer={self.is_first_peer}, "
172+
f"hidden_states shape={intermediate_tensors.tensors.get('hidden_states').shape if hasattr(intermediate_tensors, 'tensors') else 'N/A'})"
173+
)
174+
175+
# Call parent execute_model with intermediate_tensors parameter
176+
# Both the instance variable and parameter are needed for vLLM v1
177+
return super().execute_model(
178+
scheduler_output=scheduler_output,
179+
intermediate_tensors=intermediate_tensors,
180+
)
153181

154182
def _create_kv_cache_config(self, kv_cache_memory_fraction: float = None) -> KVCacheConfig:
155183
logger.debug("Generating KV cache configuration from model...")

0 commit comments

Comments
 (0)