diff --git a/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py b/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py index 3e5b35946..a17b55f77 100644 --- a/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py +++ b/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py @@ -148,7 +148,12 @@ def _parse_transformer_config(transformer_config: dict) -> dict: if isinstance(v, (bool, int, str)): config[k] = v else: - config[k] = str(v) + # Handle https://github.com/NVIDIA/Model-Optimizer/issues/981 where + # hierarchical_context_parallel_sizes: [8, 2] will raise a TypeError. + try: + config[k] = str(v) + except (AttributeError, TypeError): + print("Warning: TransformerConfig.{} does not have _repr_ implemented.") return config