@@ -150,6 +150,34 @@ def __init__(
150150 f"ParallaxVLLMModelRunner initialized: layers [{ start_layer } , { end_layer } ), "
151151 f"is_first={ self .is_first_peer } , is_last={ self .is_last_peer } "
152152 )
153+
154+ def execute_model (
155+ self ,
156+ scheduler_output : Any ,
157+ intermediate_tensors : Optional [Any ] = None ,
158+ ) -> Any :
159+ """
160+ Execute the model with proper intermediate tensors handling for pipeline parallelism.
161+
162+ For vLLM v1, intermediate_tensors must be set as an instance variable
163+ AND passed as a parameter for proper pipeline parallelism support.
164+ """
165+ # Set intermediate_tensors as instance variable if provided
166+ # This is needed for sync_and_slice_intermediate_tensors to work
167+ if intermediate_tensors is not None :
168+ self .intermediate_tensors = intermediate_tensors
169+ logger .debug (
170+ f"Set intermediate_tensors on model_runner for PP "
171+ f"(is_first_peer={ self .is_first_peer } , "
172+ f"hidden_states shape={ intermediate_tensors .tensors .get ('hidden_states' ).shape if hasattr (intermediate_tensors , 'tensors' ) else 'N/A' } )"
173+ )
174+
175+ # Call parent execute_model with intermediate_tensors parameter
176+ # Both the instance variable and parameter are needed for vLLM v1
177+ return super ().execute_model (
178+ scheduler_output = scheduler_output ,
179+ intermediate_tensors = intermediate_tensors ,
180+ )
153181
154182 def _create_kv_cache_config (self , kv_cache_memory_fraction : float = None ) -> KVCacheConfig :
155183 logger .debug ("Generating KV cache configuration from model..." )
0 commit comments