InfiniTensor
diff --git a/‎test/infiniop/causal_softmax.py‎
Lines changed: 96 additions & 83 deletions b/‎test/infiniop/causal_softmax.py‎
Lines changed: 96 additions & 83 deletions
@@ -1,26 +1,50 @@
-from ctypes import POINTER, Structure, c_int32, c_uint64, c_void_p
+import torch
 import ctypes
-import sys
-import os
-
-
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
-from operatorspy import (
-    open_lib,
-    to_tensor,
-    DeviceEnum,
+from ctypes import POINTER, Structure, c_int32, c_size_t, c_uint64, c_void_p, c_float
+from libinfiniop import (
     infiniopHandle_t,
     infiniopTensorDescriptor_t,
-    create_handle,
-    destroy_handle,
+    open_lib,
+    to_tensor,
+    get_test_devices,
     check_error,
-    rearrange_tensor,
+    rearrange_if_needed,
     create_workspace,
+    test_operator,
+    get_args,
+    debug,
+    get_tolerance,
+    profile_operation,
 )
 
-from operatorspy.tests.test_utils import get_args
-import torch
+# ==============================================================================
+#  Configuration (Internal Use Only)
+# ==============================================================================
+# These are not meant to be imported from other modules
 
+_TEST_CASES = [
+        # x_shape, x_stride
+        ((32, 512), None),
+        ((32, 512), (1024, 1)),
+        ((32, 5, 5), None),
+        ((32, 20, 512), None),
+        ((32, 20, 512), (20480, 512, 1)),  # Ascend 暂不支持非连续
+        ((32, 20, 4, 512), None),
+        ((32, 20, 4, 512), (81920, 2048, 512, 1)),
+    ]
+# Data types used for testing
+_TENSOR_DTYPES = [torch.float16, torch.float32]
+
+# Tolerance map for different data types
+_TOLERANCE_MAP = {
+    torch.float16: {'atol': 0, 'rtol': 1e-2},
+    torch.float32: {'atol': 0, 'rtol': 1e-3},
+}
+
+DEBUG = False
+PROFILE = False
+NUM_PRERUN = 10
+NUM_ITERATIONS = 1000
 
 class CausalSoftmaxDescriptor(Structure):
     _fields_ = [("device", c_int32)]
@@ -37,88 +61,78 @@ def causal_softmax(x):
     return torch.nn.functional.softmax(masked, dim=-1).to(type)
 
 
-def test(lib, handle, torch_device, x_shape, x_stride=None, x_dtype=torch.float16):
+def test(
+    lib, 
+    handle, 
+    torch_device, 
+    x_shape, 
+    x_stride=None, 
+    dtype=torch.float16
+):
     print(
-        f"Testing CausalSoftmax on {torch_device} with x_shape:{x_shape} x_stride:{x_stride} dtype:{x_dtype}"
+        f"Testing CausalSoftmax on {torch_device} with x_shape:{x_shape} x_stride:{x_stride} dtype:{dtype}"
     )
-    x = torch.rand(x_shape, dtype=x_dtype).to(torch_device)
-    if x_stride is not None:
-        x = rearrange_tensor(x, x_stride)
+    x = torch.rand(x_shape, dtype=dtype).to(torch_device)
+
     ans = causal_softmax(x)
+
+    
+    x = rearrange_if_needed(x, x_stride)
+    
     x_tensor = to_tensor(x, lib)
+
     descriptor = infiniopCausalSoftmaxDescriptor_t()
     check_error(
         lib.infiniopCreateCausalSoftmaxDescriptor(
-            handle, ctypes.byref(descriptor), x_tensor.descriptor
+            handle, 
+            ctypes.byref(descriptor),
+            x_tensor.descriptor
         )
     )
+
+    # Invalidate the shape and strides in the descriptor to prevent them from being directly used by the kernel
+    x_tensor.descriptor.contents.invalidate()
+
+
     workspace_size = c_uint64(0)
     check_error(
         lib.infiniopGetCausalSoftmaxWorkspaceSize(
             descriptor, ctypes.byref(workspace_size)
         )
     )
-
-    # Invalidate the shape and strides in the descriptor to prevent them from being directly used by the kernel
-    x_tensor.descriptor.contents.invalidate()
-
     workspace = create_workspace(workspace_size.value, x.device)
-    check_error(
-        lib.infiniopCausalSoftmax(
-            descriptor,
-            workspace.data_ptr() if workspace is not None else None,
-            workspace_size.value,
-            x_tensor.data,
-            None,
+    def lib_causal_softmax():
+        check_error(
+            lib.infiniopCausalSoftmax(
+                descriptor,
+                workspace.data_ptr() if workspace is not None else None,
+                workspace_size.value,
+                x_tensor.data,
+                None,
+            )
         )
-    )
-    assert torch.allclose(x, ans, atol=0, rtol=1e-2)
-    check_error(lib.infiniopDestroyCausalSoftmaxDescriptor(descriptor))
-
-
-def test_cpu(lib, test_cases):
-    device = DeviceEnum.DEVICE_CPU
-    handle = create_handle(lib, device)
-    for x_shape, x_stride in test_cases:
-        test(lib, handle, "cpu", x_shape, x_stride)
-    destroy_handle(lib, handle)
-
-
-def test_cuda(lib, test_cases):
-    device = DeviceEnum.DEVICE_CUDA
-    handle = create_handle(lib, device)
-    for x_shape, x_stride in test_cases:
-        test(lib, handle, "cuda", x_shape, x_stride)
-    destroy_handle(lib, handle)
-
-
-def test_bang(lib, test_cases):
-    import torch_mlu
-
-    device = DeviceEnum.DEVICE_BANG
-    handle = create_handle(lib, device)
-    for x_shape, x_stride in test_cases:
-        test(lib, handle, "mlu", x_shape, x_stride)
-    destroy_handle(lib, handle)
+    lib_causal_softmax()
+    
+    atol, rtol = get_tolerance(_TOLERANCE_MAP, dtype)
+    if DEBUG:
+        debug(x, ans, atol=atol, rtol=rtol)
+    assert torch.allclose(x, ans, atol=atol, rtol=rtol)
+
+    # Profiling workflow
+    if PROFILE:
+        # fmt: off
+        profile_operation("PyTorch", lambda: causal_softmax(x), torch_device, NUM_PRERUN, NUM_ITERATIONS)
+        profile_operation("    lib", lambda: lib_causal_softmax(), torch_device, NUM_PRERUN, NUM_ITERATIONS)
+        # fmt: on
 
+    check_error(lib.infiniopDestroyCausalSoftmaxDescriptor(descriptor))
 
-def test_ascend(lib, test_cases):
-    import torch_npu
 
-    device = DeviceEnum.DEVICE_ASCEND
-    handle = create_handle(lib, device)
-    for x_shape, x_stride in test_cases:
-        test(lib, handle, "npu", x_shape, x_stride)
 
-    destroy_handle(lib, handle)
 
 
 if __name__ == "__main__":
-    test_cases = [
-        # x_shape, x_stride
-        ((32, 20, 512), None),
-        ((32, 20, 512), (20480, 512, 1)),  # Ascend 暂不支持非连续
-    ]
+    
     args = get_args()
     lib = open_lib()
     lib.infiniopCreateCausalSoftmaxDescriptor.restype = c_int32
@@ -144,15 +158,14 @@ def test_ascend(lib, test_cases):
     lib.infiniopDestroyCausalSoftmaxDescriptor.argtypes = [
         infiniopCausalSoftmaxDescriptor_t,
     ]
+    # Configure testing options
+    DEBUG = args.debug
+    PROFILE = args.profile
+    NUM_PRERUN = args.num_prerun
+    NUM_ITERATIONS = args.num_iterations
+    
+    for device in get_test_devices(args):
+        test_operator(lib, device, test, _TEST_CASES, _TENSOR_DTYPES)
 
-    if args.cpu:
-        test_cpu(lib, test_cases)
-    if args.cuda:
-        test_cuda(lib, test_cases)
-    if args.bang:
-        test_bang(lib, test_cases)
-    if args.ascend:
-        test_ascend(lib, test_cases)
-    if not (args.cpu or args.cuda or args.bang or args.ascend):
-        test_cpu(lib, test_cases)
     print("\033[92mTest passed!\033[0m")
+