Fix grouped_mm stride issue

wconstab · wconstab · commit 1dbcdfe06c5d · 2025-07-17T17:24:48.000-07:00
diff --git a/autoparallel/compute_estimation.py b/autoparallel/compute_estimation.py
@@ -170,9 +170,10 @@ def _get_sharded_shape_stride(spec):
         if placement.is_shard():
             dim = placement.dim
             new_tensor_shape[dim] = (new_tensor_shape[dim] + mesh_size - 1) // mesh_size
-            new_tensor_stride[dim] = (
-                new_tensor_stride[dim] + mesh_size - 1
-            ) // mesh_size
+            if dim - 1 > 0:
+                new_tensor_stride[dim - 1] = (
+                    new_tensor_stride[dim - 1] + mesh_size - 1
+                ) // mesh_size
     return new_tensor_shape, new_tensor_stride
 
 
@@ -213,16 +214,10 @@ def estimate_strategy_runtime_cost(node, strategy):
 
     # TODO: maybe cache the flop_counter to avoid recreating it
     # all the time
-    try:
-        with FlopCounterMode(display=False) as flop_counter:
-            node.target(*args, **kwargs)
-
-        flops = flop_counter.get_total_flops()
-    except RuntimeError as exc:
-        if node.target == torch.ops.aten._grouped_mm.default:
-            flops = float("inf")
-        else:
-            raise exc
+    with FlopCounterMode(display=False) as flop_counter:
+        node.target(*args, **kwargs)
+
+    flops = flop_counter.get_total_flops()
 
     # TODO: fix this
     dtype = strategy.input_specs[0].tensor_meta.dtype
diff --git a/autoparallel/propagation_rules.py b/autoparallel/propagation_rules.py
@@ -508,9 +508,11 @@ def native_layer_norm_backward_rule(mesh, op_schema):
 
 @register_opschema_rule(torch.ops.prims.convert_element_type.default)
 def convert_element_type_rule(mesh, op_schema):
-    from torch.distributed.tensor._ops._tensor_ops import default_strategy
+    from torch.distributed.tensor._ops._tensor_ops import (
+        propagate_single_input_strategy,
+    )
 
-    out_strat = default_strategy(op_schema)
+    out_strat = propagate_single_input_strategy(op_schema)
     return out_strat