Skip to content

Commit b24849a

Browse files
committed
add feature combo guards
Signed-off-by: Qiang Xu <qiangx@nvidia.com>
1 parent 731524a commit b24849a

File tree

1 file changed

+12
-17
lines changed

1 file changed

+12
-17
lines changed

tensorrt_llm/_torch/pyexecutor/py_executor_creator.py

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,18 @@ def allocation_scope(current_stage: ExecutorMemoryType,
351351
validate_feature_combination(llm_args, model_engine, llm_args.sampler_type)
352352

353353
if llm_args.sm_disagg_config is not None:
354+
if llm_args.cache_transceiver_config is not None:
355+
raise ValueError(
356+
"SM-level disaggregation is not compatible with disaggregated serving."
357+
)
358+
if llm_args.parallel_config.world_size > 1:
359+
raise NotImplementedError(
360+
"SM-level disaggregation is not supported with parallelism.")
361+
if scheduler_config.capacity_scheduler_policy != CapacitySchedulerPolicy.GUARANTEED_NO_EVICT:
362+
raise NotImplementedError(
363+
"SM-level disaggregation is only supported with guaranteed no evict scheduler policy."
364+
)
365+
354366
with allocation_scope(ExecutorMemoryType.MODEL_ENGINE_CTX,
355367
RestoreMode.PINNED):
356368
ctx_llm_args = copy.copy(llm_args)
@@ -367,23 +379,6 @@ def allocation_scope(current_stage: ExecutorMemoryType,
367379
else:
368380
ctx_model_engine = None
369381

370-
if llm_args.sm_disagg_config is not None:
371-
with allocation_scope(ExecutorMemoryType.MODEL_ENGINE_CTX,
372-
RestoreMode.PINNED):
373-
ctx_backend_config = copy.copy(pytorch_backend_config)
374-
ctx_backend_config.use_cuda_graph = False
375-
ctx_model_engine = PyTorchModelEngine(
376-
model_path=checkpoint_dir,
377-
llm_args=llm_args,
378-
mapping=mapping,
379-
attn_runtime_features=attn_runtime_features,
380-
dist=dist,
381-
spec_config=spec_config,
382-
weight_sharing_model=model_engine.model,
383-
)
384-
else:
385-
ctx_model_engine = None
386-
387382
if has_draft_model_engine:
388383
with allocation_scope(ExecutorMemoryType.MODEL_ENGINE_DRAFT,
389384
RestoreMode.PINNED):

0 commit comments

Comments
 (0)