From 4147f3be0e75cea3be8aabaf28ee73fb68877325 Mon Sep 17 00:00:00 2001
From: neoblizz <osama94@gmail.com>
Date: Mon, 17 Nov 2025 22:22:07 +0000
Subject: [PATCH 1/5] Revamp testing.

---
 .github/scripts/container_build.sh            |   5 +-
 .github/scripts/container_exec.sh             |   4 +-
 .github/scripts/container_run.sh              |   3 +-
 .github/scripts/run_tests.sh                  |  47 +++--
 .../iris-external-validation-test.yml         |   3 +
 .../iris-performance-regression-test.yml      |   3 +
 .github/workflows/iris-pip-install-test.yml   | 198 ------------------
 .github/workflows/iris-tests-apptainer.yml    | 111 ----------
 .github/workflows/iris-tests.yml              | 104 +++++++++
 9 files changed, 142 insertions(+), 336 deletions(-)
 delete mode 100644 .github/workflows/iris-pip-install-test.yml
 delete mode 100644 .github/workflows/iris-tests-apptainer.yml
 create mode 100644 .github/workflows/iris-tests.yml

diff --git a/.github/scripts/container_build.sh b/.github/scripts/container_build.sh
index a1f7464a..dfe62d1d 100755
--- a/.github/scripts/container_build.sh
+++ b/.github/scripts/container_build.sh
@@ -36,9 +36,10 @@ if [ "$CONTAINER_RUNTIME" = "apptainer" ]; then
     
 elif [ "$CONTAINER_RUNTIME" = "docker" ]; then
     echo "[INFO] Checking Docker images..."
-    IMAGE_NAME="iris-dev-triton-aafec41"
+    # Use GitHub variable if set, otherwise default to iris-dev
+    IMAGE_NAME=${DOCKER_IMAGE_NAME:-"iris-dev"}
     
-    # Check if the triton image exists
+    # Check if the image exists
     if docker image inspect "$IMAGE_NAME" &> /dev/null; then
         echo "[INFO] Using existing Docker image: $IMAGE_NAME"
     else
diff --git a/.github/scripts/container_exec.sh b/.github/scripts/container_exec.sh
index 1ef3e327..a02affba 100755
--- a/.github/scripts/container_exec.sh
+++ b/.github/scripts/container_exec.sh
@@ -86,7 +86,9 @@ if [ "$CONTAINER_RUNTIME" = "apptainer" ]; then
     exit $EXIT_CODE
     
 elif [ "$CONTAINER_RUNTIME" = "docker" ]; then
-    IMAGE_NAME=${CUSTOM_IMAGE:-${DOCKER_IMAGE_NAME:-"iris-dev-triton-aafec41"}}
+    # Use custom image if provided, otherwise use GitHub variable or default
+    # GitHub Actions sets DOCKER_IMAGE_NAME, locally defaults to iris-dev
+    IMAGE_NAME=${CUSTOM_IMAGE:-${DOCKER_IMAGE_NAME:-"iris-dev"}}
     
     if ! docker image inspect "$IMAGE_NAME" &> /dev/null; then
         echo "[ERROR] Docker image $IMAGE_NAME not found" >&2
diff --git a/.github/scripts/container_run.sh b/.github/scripts/container_run.sh
index ce5ffe2e..30b4a535 100755
--- a/.github/scripts/container_run.sh
+++ b/.github/scripts/container_run.sh
@@ -25,7 +25,8 @@ if [ "$CONTAINER_RUNTIME" = "apptainer" ]; then
     bash apptainer/run.sh "$@"
 elif [ "$CONTAINER_RUNTIME" = "docker" ]; then
     echo "[INFO] Running with Docker..."
-    IMAGE_NAME=${1:-"iris-dev-triton-aafec41"}
+    # Use GitHub variable if set, otherwise default to iris-dev
+    IMAGE_NAME=${1:-${DOCKER_IMAGE_NAME:-"iris-dev"}}
     WORKSPACE_DIR=${2:-"$(pwd)"}
     bash docker/run.sh "$IMAGE_NAME" "$WORKSPACE_DIR"
 fi
diff --git a/.github/scripts/run_tests.sh b/.github/scripts/run_tests.sh
index c74801d1..160de790 100755
--- a/.github/scripts/run_tests.sh
+++ b/.github/scripts/run_tests.sh
@@ -3,16 +3,28 @@
 # Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
 #
 # Run Iris tests in a container
-# Usage: run_tests.sh <num_ranks> [gpu_devices]
+# Usage: run_tests.sh <test_dir> <num_ranks> [gpu_devices]
+#   test_dir: subdirectory under tests/ (e.g., examples, unittests, ccl)
+#   num_ranks: number of GPU ranks (1, 2, 4, or 8)
+#   gpu_devices: comma-separated GPU device IDs (optional)
 
 set -e
 
-NUM_RANKS=$1
-GPU_DEVICES=${2:-""}
+TEST_DIR=$1
+NUM_RANKS=$2
+GPU_DEVICES=${3:-""}
 
-if [ -z "$NUM_RANKS" ]; then
-    echo "[ERROR] NUM_RANKS not provided"
-    echo "Usage: $0 <num_ranks> [gpu_devices]"
+if [ -z "$TEST_DIR" ] || [ -z "$NUM_RANKS" ]; then
+    echo "[ERROR] Missing required arguments"
+    echo "Usage: $0 <test_dir> <num_ranks> [gpu_devices]"
+    echo "  test_dir: examples, unittests, or ccl"
+    echo "  num_ranks: 1, 2, 4, or 8"
+    exit 1
+fi
+
+# Validate test directory
+if [ ! -d "tests/$TEST_DIR" ]; then
+    echo "[ERROR] Test directory tests/$TEST_DIR does not exist"
     exit 1
 fi
 
@@ -29,23 +41,12 @@ fi
     set -e
     pip install -e .
     
-    # Run examples tests
-    for test_file in tests/examples/test_*.py; do
-        echo \"Testing: \$test_file with $NUM_RANKS ranks\"
-        python tests/run_tests_distributed.py --num_ranks $NUM_RANKS \"\$test_file\" -v --tb=short --durations=10
+    # Run tests in the specified directory
+    for test_file in tests/$TEST_DIR/test_*.py; do
+        if [ -f \"\$test_file\" ]; then
+            echo \"Testing: \$test_file with $NUM_RANKS ranks\"
+            python tests/run_tests_distributed.py --num_ranks $NUM_RANKS \"\$test_file\" -v --tb=short --durations=10
+        fi
     done
-    
-    # Run unit tests
-    for test_file in tests/unittests/test_*.py; do
-        echo \"Testing: \$test_file with $NUM_RANKS ranks\"
-        python tests/run_tests_distributed.py --num_ranks $NUM_RANKS \"\$test_file\" -v --tb=short --durations=10
-    done
-
-    # Run ccl tests
-    # DISABLED: CCL host-side APIs have issues for some data types/algorithms
-    # for test_file in tests/ccl/test_*.py; do
-    #     echo \"Testing: \$test_file with $NUM_RANKS ranks\"
-    #     python tests/run_tests_distributed.py --num_ranks $NUM_RANKS \"\$test_file\" -v --tb=short --durations=10
-    # done
 "
 
diff --git a/.github/workflows/iris-external-validation-test.yml b/.github/workflows/iris-external-validation-test.yml
index f609fc7a..655d13f6 100644
--- a/.github/workflows/iris-external-validation-test.yml
+++ b/.github/workflows/iris-external-validation-test.yml
@@ -11,6 +11,9 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
 
+env:
+  DOCKER_IMAGE_NAME: ${{ vars.DOCKER_IMAGE_NAME || 'iris-dev-triton-aafec41' }}
+
 jobs:
   build-container-image:
     runs-on: [self-hosted, mi3008x]
diff --git a/.github/workflows/iris-performance-regression-test.yml b/.github/workflows/iris-performance-regression-test.yml
index fa017886..137ee2d0 100644
--- a/.github/workflows/iris-performance-regression-test.yml
+++ b/.github/workflows/iris-performance-regression-test.yml
@@ -11,6 +11,9 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
 
+env:
+  DOCKER_IMAGE_NAME: ${{ vars.DOCKER_IMAGE_NAME || 'iris-dev-triton-aafec41' }}
+
 jobs:
   build-container-image:
     runs-on: [self-hosted, mi3008x]
diff --git a/.github/workflows/iris-pip-install-test.yml b/.github/workflows/iris-pip-install-test.yml
deleted file mode 100644
index aa7ee86f..00000000
--- a/.github/workflows/iris-pip-install-test.yml
+++ /dev/null
@@ -1,198 +0,0 @@
-name: Iris Pip Install Test
-
-on:
-  push:
-    branches: [ main ]
-  pull_request:
-    branches: [ main ]
-  workflow_dispatch:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
-  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
-
-jobs:
-  build-container-image:
-    runs-on: [self-hosted, mi3008x]
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Setup Apptainer (if not available)
-        run: |
-          if ! command -v apptainer &> /dev/null && ! command -v docker &> /dev/null; then
-            echo "Neither Apptainer nor Docker found, installing Apptainer..."
-            apt-get update && apt-get install -y software-properties-common
-            add-apt-repository -y ppa:apptainer/ppa
-            apt-get update && apt-get install -y apptainer
-          else
-            echo "Container runtime already available"
-          fi
-
-      - name: Build Iris container
-        run: |
-          # Use the universal container build script
-          bash .github/scripts/container_build.sh
-
-  test-1-2-4-ranks:
-    name: Pip Install Test 1/2/4 Ranks (Parallel)
-    needs: build-container-image
-    runs-on: [self-hosted, mi3008x]
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Cleanup lingering ports before tests
-        run: |
-          bash .github/scripts/cleanup_ports.sh
-
-      - name: Run pip install tests for 1, 2, 4 ranks in parallel
-        run: |
-          # Don't use set -e here - we want to handle errors manually for parallel processes
-          # Run tests in parallel with different GPU assignments
-          # Note: Each test gets 2+ GPUs even if it only uses some of them.
-          # This allows tests like test_empty_device_handling to verify that
-          # allocating on a different device correctly raises an error.
-
-          echo "::group::Starting parallel tests"
-          echo "Starting 1-rank test on GPUs 0,1..."
-          bash .github/scripts/container_exec.sh --gpus "0,1" "
-            set -e
-            pip install git+https://github.com/${{ github.repository }}.git@${{ github.sha }}
-            pip install -e .
-            for test_file in tests/examples/test_*.py; do
-              echo \"Testing: \$test_file with 1 ranks\"
-              python tests/run_tests_distributed.py --num_ranks 1 \"\$test_file\" -v --tb=short --durations=10
-            done
-            for test_file in tests/unittests/test_*.py; do
-              echo \"Testing: \$test_file with 1 ranks\"
-              python tests/run_tests_distributed.py --num_ranks 1 \"\$test_file\" -v --tb=short --durations=10
-            done
-          " > /tmp/test_1rank.log 2>&1 &
-          PID1=$!
-
-          echo "Starting 2-rank test on GPUs 2,3..."
-          bash .github/scripts/container_exec.sh --gpus "2,3" "
-            set -e
-            pip install git+https://github.com/${{ github.repository }}.git@${{ github.sha }}
-            pip install -e .
-            for test_file in tests/examples/test_*.py; do
-              echo \"Testing: \$test_file with 2 ranks\"
-              python tests/run_tests_distributed.py --num_ranks 2 \"\$test_file\" -v --tb=short --durations=10
-            done
-            for test_file in tests/unittests/test_*.py; do
-              echo \"Testing: \$test_file with 2 ranks\"
-              python tests/run_tests_distributed.py --num_ranks 2 \"\$test_file\" -v --tb=short --durations=10
-            done
-          " > /tmp/test_2rank.log 2>&1 &
-          PID2=$!
-
-          echo "Starting 4-rank test on GPUs 4,5,6,7..."
-          bash .github/scripts/container_exec.sh --gpus "4,5,6,7" "
-            set -e
-            pip install git+https://github.com/${{ github.repository }}.git@${{ github.sha }}
-            pip install -e .
-            for test_file in tests/examples/test_*.py; do
-              echo \"Testing: \$test_file with 4 ranks\"
-              python tests/run_tests_distributed.py --num_ranks 4 \"\$test_file\" -v --tb=short --durations=10
-            done
-            for test_file in tests/unittests/test_*.py; do
-              echo \"Testing: \$test_file with 4 ranks\"
-              python tests/run_tests_distributed.py --num_ranks 4 \"\$test_file\" -v --tb=short --durations=10
-            done
-          " > /tmp/test_4rank.log 2>&1 &
-          PID4=$!
-          echo "::endgroup::"
-
-          # Wait for all parallel tests and track failures
-          echo "::group::Waiting for parallel tests to complete"
-          FAIL=0
-          FAILED_TESTS=""
-
-          # Wait for each process and capture exit status
-          if ! wait $PID1; then
-            echo "::error::1-rank test FAILED"
-            echo "::group::1-rank test logs"
-            cat /tmp/test_1rank.log || true
-            echo "::endgroup::"
-            FAILED_TESTS="$FAILED_TESTS 1-rank"
-            FAIL=1
-          else
-            echo "✅ 1-rank test passed"
-          fi
-
-          if ! wait $PID2; then
-            echo "::error::2-rank test FAILED"
-            echo "::group::2-rank test logs"
-            cat /tmp/test_2rank.log || true
-            echo "::endgroup::"
-            FAILED_TESTS="$FAILED_TESTS 2-rank"
-            FAIL=1
-          else
-            echo "✅ 2-rank test passed"
-          fi
-
-          if ! wait $PID4; then
-            echo "::error::4-rank test FAILED"
-            echo "::group::4-rank test logs"
-            cat /tmp/test_4rank.log || true
-            echo "::endgroup::"
-            FAILED_TESTS="$FAILED_TESTS 4-rank"
-            FAIL=1
-          else
-            echo "✅ 4-rank test passed"
-          fi
-          echo "::endgroup::"
-
-          # Clean up log files
-          rm -f /tmp/test_1rank.log /tmp/test_2rank.log /tmp/test_4rank.log
-
-          if [ $FAIL -eq 1 ]; then
-            echo "::error::Parallel tests failed:$FAILED_TESTS"
-            exit 1
-          fi
-
-          echo "✅ All parallel tests (1, 2, 4 ranks) passed!"
-
-  test-8-ranks:
-    name: Pip Install Test 8 Ranks
-    needs: build-container-image
-    runs-on: [self-hosted, mi3008x]
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Cleanup lingering ports before tests
-        run: |
-          bash .github/scripts/cleanup_ports.sh
-
-      - name: Run 8-rank pip install test
-        run: |
-          echo "::group::Running 8-rank test on all GPUs"
-          if bash .github/scripts/container_exec.sh --gpus "0,1,2,3,4,5,6,7" "
-            set -e
-            pip install git+https://github.com/${{ github.repository }}.git@${{ github.sha }}
-            pip install -e .
-            for test_file in tests/examples/test_*.py; do
-              echo \"Testing: \$test_file with 8 ranks\"
-              python tests/run_tests_distributed.py --num_ranks 8 \"\$test_file\" -v --tb=short --durations=10
-            done
-            for test_file in tests/unittests/test_*.py; do
-              echo \"Testing: \$test_file with 8 ranks\"
-              python tests/run_tests_distributed.py --num_ranks 8 \"\$test_file\" -v --tb=short --durations=10
-            done
-          "; then
-            echo "::endgroup::"
-            echo "✅ 8-rank test passed!"
-          else
-            echo "::endgroup::"
-            echo "::error::8-rank test FAILED"
-            exit 1
-          fi
diff --git a/.github/workflows/iris-tests-apptainer.yml b/.github/workflows/iris-tests-apptainer.yml
deleted file mode 100644
index 8df639bf..00000000
--- a/.github/workflows/iris-tests-apptainer.yml
+++ /dev/null
@@ -1,111 +0,0 @@
-name: Iris Tests
-
-on:
-  push:
-    branches: [ main ]
-  pull_request:
-    branches: [ main ]
-  workflow_dispatch:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
-  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
-
-jobs:
-  build-container-image:
-    runs-on: [self-hosted, mi3008x]
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Setup Apptainer (if not available)
-        run: |
-          if ! command -v apptainer &> /dev/null && ! command -v docker &> /dev/null; then
-            echo "Neither Apptainer nor Docker found, installing Apptainer..."
-            apt-get update && apt-get install -y software-properties-common
-            add-apt-repository -y ppa:apptainer/ppa
-            apt-get update && apt-get install -y apptainer
-          else
-            echo "Container runtime already available"
-          fi
-
-      - name: Build Iris container
-        run: |
-          # Use the universal container build script
-          bash .github/scripts/container_build.sh
-  test-1-2-4-ranks:
-    name: Test 1/2/4 Ranks (Parallel)
-    needs: build-container-image
-    runs-on: [self-hosted, mi3008x]
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Cleanup lingering ports before tests
-        run: |
-          bash .github/scripts/cleanup_ports.sh
-
-      - name: Run 1, 2, 4 rank tests in parallel
-        run: |
-          set -e
-
-          # Run tests in parallel with different GPU assignments
-          # Note: Each test gets 2+ GPUs even if it only uses some of them.
-          # This allows tests like test_empty_device_handling to verify that
-          # allocating on a different device correctly raises an error.
-
-          echo "::group::Starting parallel tests"
-          echo "Starting 1-rank test on GPUs 0,1..."
-          bash .github/scripts/run_tests.sh 1 "0,1" &
-          PID1=$!
-
-          echo "Starting 2-rank test on GPUs 2,3..."
-          bash .github/scripts/run_tests.sh 2 "2,3" &
-          PID2=$!
-
-          echo "Starting 4-rank test on GPUs 4,5,6,7..."
-          bash .github/scripts/run_tests.sh 4 "4,5,6,7" &
-          PID4=$!
-          echo "::endgroup::"
-
-          # Wait for all parallel tests and track failures
-          echo "::group::Waiting for parallel tests to complete"
-          FAIL=0
-          FAILED_TESTS=""
-
-          wait $PID1 || { echo "::error::1-rank test FAILED"; FAILED_TESTS="$FAILED_TESTS 1-rank"; FAIL=1; }
-          wait $PID2 || { echo "::error::2-rank test FAILED"; FAILED_TESTS="$FAILED_TESTS 2-rank"; FAIL=1; }
-          wait $PID4 || { echo "::error::4-rank test FAILED"; FAILED_TESTS="$FAILED_TESTS 4-rank"; FAIL=1; }
-          echo "::endgroup::"
-
-          if [ $FAIL -eq 1 ]; then
-            echo "::error::Parallel tests failed:$FAILED_TESTS"
-            exit 1
-          fi
-
-          echo "✅ All parallel tests (1, 2, 4 ranks) passed!"
-
-  test-8-ranks:
-    name: Test 8 Ranks
-    needs: build-container-image
-    runs-on: [self-hosted, mi3008x]
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Cleanup lingering ports before tests
-        run: |
-          bash .github/scripts/cleanup_ports.sh
-
-      - name: Run 8-rank test
-        run: |
-          set -e
-
-          echo "::group::Running 8-rank test on all GPUs"
-          bash .github/scripts/run_tests.sh 8 "0,1,2,3,4,5,6,7"
-          echo "::endgroup::"
-
-          echo "✅ 8-rank test passed!"
diff --git a/.github/workflows/iris-tests.yml b/.github/workflows/iris-tests.yml
new file mode 100644
index 00000000..1fec194b
--- /dev/null
+++ b/.github/workflows/iris-tests.yml
@@ -0,0 +1,104 @@
+name: Iris Tests
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+env:
+  DOCKER_IMAGE_NAME: ${{ vars.DOCKER_IMAGE_NAME || 'iris-dev-triton-aafec41' }}
+
+jobs:
+  build-container-image:
+    runs-on: [self-hosted, mi3008x]
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Apptainer (if not available)
+        run: |
+          if ! command -v apptainer &> /dev/null && ! command -v docker &> /dev/null; then
+            echo "Neither Apptainer nor Docker found, installing Apptainer..."
+            apt-get update && apt-get install -y software-properties-common
+            add-apt-repository -y ppa:apptainer/ppa
+            apt-get update && apt-get install -y apptainer
+          else
+            echo "Container runtime already available"
+          fi
+
+      - name: Build Iris container
+        run: |
+          bash .github/scripts/container_build.sh
+
+  test:
+    name: Test ${{ matrix.test_dir }} (${{ matrix.num_ranks }} ranks)
+    needs: build-container-image
+    runs-on: [self-hosted, mi3008x]
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          # Test each subdirectory with each rank count
+          - test_dir: examples
+            num_ranks: 1
+            gpu_devices: "0,1"
+          - test_dir: examples
+            num_ranks: 2
+            gpu_devices: "2,3"
+          - test_dir: examples
+            num_ranks: 4
+            gpu_devices: "4,5,6,7"
+          - test_dir: examples
+            num_ranks: 8
+            gpu_devices: "0,1,2,3,4,5,6,7"
+          - test_dir: unittests
+            num_ranks: 1
+            gpu_devices: "0,1"
+          - test_dir: unittests
+            num_ranks: 2
+            gpu_devices: "2,3"
+          - test_dir: unittests
+            num_ranks: 4
+            gpu_devices: "4,5,6,7"
+          - test_dir: unittests
+            num_ranks: 8
+            gpu_devices: "0,1,2,3,4,5,6,7"
+          - test_dir: ccl
+            num_ranks: 1
+            gpu_devices: "0,1"
+          - test_dir: ccl
+            num_ranks: 2
+            gpu_devices: "2,3"
+          - test_dir: ccl
+            num_ranks: 4
+            gpu_devices: "4,5,6,7"
+          - test_dir: ccl
+            num_ranks: 8
+            gpu_devices: "0,1,2,3,4,5,6,7"
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Cleanup lingering ports before tests
+        run: |
+          bash .github/scripts/cleanup_ports.sh
+
+      - name: Run ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks
+        run: |
+          set -e
+          echo "::group::Running ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks"
+          bash .github/scripts/run_tests.sh \
+            "${{ matrix.test_dir }}" \
+            "${{ matrix.num_ranks }}" \
+            "${{ matrix.gpu_devices }}"
+          echo "::endgroup::"
+          echo "✅ ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks passed!"
+

From 0a197622d2c5b2fbfff046666045d732bf56defa Mon Sep 17 00:00:00 2001
From: neoblizz <osama94@gmail.com>
Date: Tue, 18 Nov 2025 02:52:49 +0000
Subject: [PATCH 2/5] Use preamble

---
 tests/ccl/test_all_reduce.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/tests/ccl/test_all_reduce.py b/tests/ccl/test_all_reduce.py
index 58f88bab..fd4da1e4 100644
--- a/tests/ccl/test_all_reduce.py
+++ b/tests/ccl/test_all_reduce.py
@@ -75,7 +75,16 @@ def test_all_reduce(variant, dtype, M, N):
         config.all_reduce_distribution = 0  # striding
     if variant == "ring":
         config.all_reduce_num_rings = min(2, config.comm_sms)
-    shmem.ccl.all_reduce(iris_output_tensor, iris_input_tensor, config=config)
+    
+    # Explicitly call preamble to ensure proper initialization and synchronization
+    # This helps with test isolation when tests run sequentially
+    workspace = shmem.ccl.all_reduce_preamble(
+        iris_output_tensor, iris_input_tensor, config=config
+    )
+    shmem.barrier()  # Ensure all ranks have completed preamble before starting kernel
+    
+    # Now call all_reduce with the prepared workspace
+    shmem.ccl.all_reduce(iris_output_tensor, iris_input_tensor, config=config, workspace=workspace)
     torch.cuda.synchronize()
 
     # Compare results
@@ -131,7 +140,15 @@ def test_all_reduce_two_shot_distribution(distribution, dtype=torch.float32, M=1
 
     shmem.barrier()
     config = Config(all_reduce_variant="two_shot", all_reduce_distribution=distribution)
-    shmem.ccl.all_reduce(iris_output_tensor, iris_input_tensor, config=config)
+    
+    # Explicitly call preamble to ensure proper initialization and synchronization
+    workspace = shmem.ccl.all_reduce_preamble(
+        iris_output_tensor, iris_input_tensor, config=config
+    )
+    shmem.barrier()  # Ensure all ranks have completed preamble before starting kernel
+    
+    # Now call all_reduce with the prepared workspace
+    shmem.ccl.all_reduce(iris_output_tensor, iris_input_tensor, config=config, workspace=workspace)
     torch.cuda.synchronize()
 
     atol = 1e-5

From 4983672dd4cd254e768ba44f8af7129af6ec409f Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Tue, 18 Nov 2025 03:04:16 +0000
Subject: [PATCH 3/5] Apply Ruff auto-fixes

---
 tests/ccl/test_all_reduce.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/tests/ccl/test_all_reduce.py b/tests/ccl/test_all_reduce.py
index fd4da1e4..16501c22 100644
--- a/tests/ccl/test_all_reduce.py
+++ b/tests/ccl/test_all_reduce.py
@@ -75,14 +75,12 @@ def test_all_reduce(variant, dtype, M, N):
         config.all_reduce_distribution = 0  # striding
     if variant == "ring":
         config.all_reduce_num_rings = min(2, config.comm_sms)
-    
+
     # Explicitly call preamble to ensure proper initialization and synchronization
     # This helps with test isolation when tests run sequentially
-    workspace = shmem.ccl.all_reduce_preamble(
-        iris_output_tensor, iris_input_tensor, config=config
-    )
+    workspace = shmem.ccl.all_reduce_preamble(iris_output_tensor, iris_input_tensor, config=config)
     shmem.barrier()  # Ensure all ranks have completed preamble before starting kernel
-    
+
     # Now call all_reduce with the prepared workspace
     shmem.ccl.all_reduce(iris_output_tensor, iris_input_tensor, config=config, workspace=workspace)
     torch.cuda.synchronize()
@@ -140,13 +138,11 @@ def test_all_reduce_two_shot_distribution(distribution, dtype=torch.float32, M=1
 
     shmem.barrier()
     config = Config(all_reduce_variant="two_shot", all_reduce_distribution=distribution)
-    
+
     # Explicitly call preamble to ensure proper initialization and synchronization
-    workspace = shmem.ccl.all_reduce_preamble(
-        iris_output_tensor, iris_input_tensor, config=config
-    )
+    workspace = shmem.ccl.all_reduce_preamble(iris_output_tensor, iris_input_tensor, config=config)
     shmem.barrier()  # Ensure all ranks have completed preamble before starting kernel
-    
+
     # Now call all_reduce with the prepared workspace
     shmem.ccl.all_reduce(iris_output_tensor, iris_input_tensor, config=config, workspace=workspace)
     torch.cuda.synchronize()

From b49a3e894d80881be79148e7b663034fe2eecfa1 Mon Sep 17 00:00:00 2001
From: neoblizz <osama94@gmail.com>
Date: Sat, 22 Nov 2025 20:15:01 +0000
Subject: [PATCH 4/5] Disable ring-reduce.

---
 tests/ccl/test_all_reduce.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ccl/test_all_reduce.py b/tests/ccl/test_all_reduce.py
index 16501c22..32cc4774 100644
--- a/tests/ccl/test_all_reduce.py
+++ b/tests/ccl/test_all_reduce.py
@@ -16,7 +16,7 @@
     "variant",
     [
         "atomic",
-        "ring",
+        # "ring",
         "two_shot",
         "one_shot",
         "spinlock",

From 9ddd9ebd6c69a27fc7adfcaa2fcbefe287dd2b12 Mon Sep 17 00:00:00 2001
From: neoblizz <osama94@gmail.com>
Date: Wed, 3 Dec 2025 20:29:16 +0000
Subject: [PATCH 5/5] Enable installation methods.

---
 .github/scripts/run_tests.sh     |  35 +++++++-
 .github/workflows/iris-tests.yml | 150 +++++++++++++++++++++++++++++--
 2 files changed, 174 insertions(+), 11 deletions(-)

diff --git a/.github/scripts/run_tests.sh b/.github/scripts/run_tests.sh
index 160de790..83c84512 100755
--- a/.github/scripts/run_tests.sh
+++ b/.github/scripts/run_tests.sh
@@ -3,22 +3,28 @@
 # Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
 #
 # Run Iris tests in a container
-# Usage: run_tests.sh <test_dir> <num_ranks> [gpu_devices]
+# Usage: run_tests.sh <test_dir> <num_ranks> [gpu_devices] [install_method]
 #   test_dir: subdirectory under tests/ (e.g., examples, unittests, ccl)
 #   num_ranks: number of GPU ranks (1, 2, 4, or 8)
 #   gpu_devices: comma-separated GPU device IDs (optional)
+#   install_method: pip install method - "git", "editable", or "install" (optional, default: "editable")
+#     - "git": pip install git+https://github.com/${{ github.repository }}.git@${{ github.sha }}
+#     - "editable": pip install -e .
+#     - "install": pip install .
 
 set -e
 
 TEST_DIR=$1
 NUM_RANKS=$2
 GPU_DEVICES=${3:-""}
+INSTALL_METHOD=${4:-"editable"}
 
 if [ -z "$TEST_DIR" ] || [ -z "$NUM_RANKS" ]; then
     echo "[ERROR] Missing required arguments"
-    echo "Usage: $0 <test_dir> <num_ranks> [gpu_devices]"
+    echo "Usage: $0 <test_dir> <num_ranks> [gpu_devices] [install_method]"
     echo "  test_dir: examples, unittests, or ccl"
     echo "  num_ranks: 1, 2, 4, or 8"
+    echo "  install_method: git, editable, or install (default: editable)"
     exit 1
 fi
 
@@ -28,6 +34,13 @@ if [ ! -d "tests/$TEST_DIR" ]; then
     exit 1
 fi
 
+# Validate install method
+if [ "$INSTALL_METHOD" != "git" ] && [ "$INSTALL_METHOD" != "editable" ] && [ "$INSTALL_METHOD" != "install" ]; then
+    echo "[ERROR] Invalid install_method: $INSTALL_METHOD"
+    echo "  Must be one of: git, editable, install"
+    exit 1
+fi
+
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
 # Build GPU argument if provided
@@ -36,15 +49,29 @@ if [ -n "$GPU_DEVICES" ]; then
     GPU_ARG="--gpus $GPU_DEVICES"
 fi
 
+# Build install command based on method
+INSTALL_CMD=""
+if [ "$INSTALL_METHOD" = "git" ]; then
+    # For git install, we need the repository and SHA from environment or use defaults
+    REPO=${GITHUB_REPOSITORY:-"ROCm/iris"}
+    SHA=${GITHUB_SHA:-"HEAD"}
+    INSTALL_CMD="pip install git+https://github.com/${REPO}.git@${SHA}"
+elif [ "$INSTALL_METHOD" = "editable" ]; then
+    INSTALL_CMD="pip install -e ."
+elif [ "$INSTALL_METHOD" = "install" ]; then
+    INSTALL_CMD="pip install ."
+fi
+
 # Run tests in container
 "$SCRIPT_DIR/container_exec.sh" $GPU_ARG "
     set -e
-    pip install -e .
+    echo \"Installing iris using method: $INSTALL_METHOD\"
+    $INSTALL_CMD
     
     # Run tests in the specified directory
     for test_file in tests/$TEST_DIR/test_*.py; do
         if [ -f \"\$test_file\" ]; then
-            echo \"Testing: \$test_file with $NUM_RANKS ranks\"
+            echo \"Testing: \$test_file with $NUM_RANKS ranks (install: $INSTALL_METHOD)\"
             python tests/run_tests_distributed.py --num_ranks $NUM_RANKS \"\$test_file\" -v --tb=short --durations=10
         fi
     done
diff --git a/.github/workflows/iris-tests.yml b/.github/workflows/iris-tests.yml
index 1fec194b..0e4b4da3 100644
--- a/.github/workflows/iris-tests.yml
+++ b/.github/workflows/iris-tests.yml
@@ -37,15 +37,15 @@ jobs:
         run: |
           bash .github/scripts/container_build.sh
 
-  test:
-    name: Test ${{ matrix.test_dir }} (${{ matrix.num_ranks }} ranks)
+  test-git:
+    name: Test ${{ matrix.test_dir }} (${{ matrix.num_ranks }} ranks, git install)
     needs: build-container-image
     runs-on: [self-hosted, mi3008x]
     strategy:
       fail-fast: false
       matrix:
         include:
-          # Test each subdirectory with each rank count
+          # Test each subdirectory with each rank count using git install
           - test_dir: examples
             num_ranks: 1
             gpu_devices: "0,1"
@@ -91,14 +91,150 @@ jobs:
         run: |
           bash .github/scripts/cleanup_ports.sh
 
-      - name: Run ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks
+      - name: Run ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (git install)
+        env:
+          GITHUB_REPOSITORY: ${{ github.repository }}
+          GITHUB_SHA: ${{ github.sha }}
         run: |
           set -e
-          echo "::group::Running ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks"
+          echo "::group::Running ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (install: git)"
           bash .github/scripts/run_tests.sh \
             "${{ matrix.test_dir }}" \
             "${{ matrix.num_ranks }}" \
-            "${{ matrix.gpu_devices }}"
+            "${{ matrix.gpu_devices }}" \
+            "git"
           echo "::endgroup::"
-          echo "✅ ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks passed!"
+          echo "✅ ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (git) passed!"
+
+  test-editable:
+    name: Test ${{ matrix.test_dir }} (${{ matrix.num_ranks }} ranks, editable install)
+    needs: [build-container-image, test-git]
+    runs-on: [self-hosted, mi3008x]
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          # Test each subdirectory with each rank count using editable install
+          - test_dir: examples
+            num_ranks: 1
+            gpu_devices: "0,1"
+          - test_dir: examples
+            num_ranks: 2
+            gpu_devices: "2,3"
+          - test_dir: examples
+            num_ranks: 4
+            gpu_devices: "4,5,6,7"
+          - test_dir: examples
+            num_ranks: 8
+            gpu_devices: "0,1,2,3,4,5,6,7"
+          - test_dir: unittests
+            num_ranks: 1
+            gpu_devices: "0,1"
+          - test_dir: unittests
+            num_ranks: 2
+            gpu_devices: "2,3"
+          - test_dir: unittests
+            num_ranks: 4
+            gpu_devices: "4,5,6,7"
+          - test_dir: unittests
+            num_ranks: 8
+            gpu_devices: "0,1,2,3,4,5,6,7"
+          - test_dir: ccl
+            num_ranks: 1
+            gpu_devices: "0,1"
+          - test_dir: ccl
+            num_ranks: 2
+            gpu_devices: "2,3"
+          - test_dir: ccl
+            num_ranks: 4
+            gpu_devices: "4,5,6,7"
+          - test_dir: ccl
+            num_ranks: 8
+            gpu_devices: "0,1,2,3,4,5,6,7"
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Cleanup lingering ports before tests
+        run: |
+          bash .github/scripts/cleanup_ports.sh
+
+      - name: Run ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (editable install)
+        run: |
+          set -e
+          echo "::group::Running ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (install: editable)"
+          bash .github/scripts/run_tests.sh \
+            "${{ matrix.test_dir }}" \
+            "${{ matrix.num_ranks }}" \
+            "${{ matrix.gpu_devices }}" \
+            "editable"
+          echo "::endgroup::"
+          echo "✅ ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (editable) passed!"
+
+  test-install:
+    name: Test ${{ matrix.test_dir }} (${{ matrix.num_ranks }} ranks, pip install)
+    needs: [build-container-image, test-editable]
+    runs-on: [self-hosted, mi3008x]
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          # Test each subdirectory with each rank count using pip install
+          - test_dir: examples
+            num_ranks: 1
+            gpu_devices: "0,1"
+          - test_dir: examples
+            num_ranks: 2
+            gpu_devices: "2,3"
+          - test_dir: examples
+            num_ranks: 4
+            gpu_devices: "4,5,6,7"
+          - test_dir: examples
+            num_ranks: 8
+            gpu_devices: "0,1,2,3,4,5,6,7"
+          - test_dir: unittests
+            num_ranks: 1
+            gpu_devices: "0,1"
+          - test_dir: unittests
+            num_ranks: 2
+            gpu_devices: "2,3"
+          - test_dir: unittests
+            num_ranks: 4
+            gpu_devices: "4,5,6,7"
+          - test_dir: unittests
+            num_ranks: 8
+            gpu_devices: "0,1,2,3,4,5,6,7"
+          - test_dir: ccl
+            num_ranks: 1
+            gpu_devices: "0,1"
+          - test_dir: ccl
+            num_ranks: 2
+            gpu_devices: "2,3"
+          - test_dir: ccl
+            num_ranks: 4
+            gpu_devices: "4,5,6,7"
+          - test_dir: ccl
+            num_ranks: 8
+            gpu_devices: "0,1,2,3,4,5,6,7"
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Cleanup lingering ports before tests
+        run: |
+          bash .github/scripts/cleanup_ports.sh
+
+      - name: Run ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (pip install)
+        run: |
+          set -e
+          echo "::group::Running ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (install: install)"
+          bash .github/scripts/run_tests.sh \
+            "${{ matrix.test_dir }}" \
+            "${{ matrix.num_ranks }}" \
+            "${{ matrix.gpu_devices }}" \
+            "install"
+          echo "::endgroup::"
+          echo "✅ ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (install) passed!"