Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/scripts/container_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,10 @@ if [ "$CONTAINER_RUNTIME" = "apptainer" ]; then

elif [ "$CONTAINER_RUNTIME" = "docker" ]; then
echo "[INFO] Checking Docker images..."
IMAGE_NAME="iris-dev-triton-aafec41"
# Use GitHub variable if set, otherwise default to iris-dev
IMAGE_NAME=${DOCKER_IMAGE_NAME:-"iris-dev"}

# Check if the triton image exists
# Check if the image exists
if docker image inspect "$IMAGE_NAME" &> /dev/null; then
echo "[INFO] Using existing Docker image: $IMAGE_NAME"
else
Expand Down
4 changes: 3 additions & 1 deletion .github/scripts/container_exec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ if [ "$CONTAINER_RUNTIME" = "apptainer" ]; then
exit $EXIT_CODE

elif [ "$CONTAINER_RUNTIME" = "docker" ]; then
IMAGE_NAME=${CUSTOM_IMAGE:-${DOCKER_IMAGE_NAME:-"iris-dev-triton-aafec41"}}
# Use custom image if provided, otherwise use GitHub variable or default
# GitHub Actions sets DOCKER_IMAGE_NAME, locally defaults to iris-dev
IMAGE_NAME=${CUSTOM_IMAGE:-${DOCKER_IMAGE_NAME:-"iris-dev"}}

if ! docker image inspect "$IMAGE_NAME" &> /dev/null; then
echo "[ERROR] Docker image $IMAGE_NAME not found" >&2
Expand Down
3 changes: 2 additions & 1 deletion .github/scripts/container_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ if [ "$CONTAINER_RUNTIME" = "apptainer" ]; then
bash apptainer/run.sh "$@"
elif [ "$CONTAINER_RUNTIME" = "docker" ]; then
echo "[INFO] Running with Docker..."
IMAGE_NAME=${1:-"iris-dev-triton-aafec41"}
# Use GitHub variable if set, otherwise default to iris-dev
IMAGE_NAME=${1:-${DOCKER_IMAGE_NAME:-"iris-dev"}}
WORKSPACE_DIR=${2:-"$(pwd)"}
bash docker/run.sh "$IMAGE_NAME" "$WORKSPACE_DIR"
fi
Expand Down
76 changes: 52 additions & 24 deletions .github/scripts/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,41 @@
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Run Iris tests in a container
# Usage: run_tests.sh <num_ranks> [gpu_devices]
# Usage: run_tests.sh <test_dir> <num_ranks> [gpu_devices] [install_method]
# test_dir: subdirectory under tests/ (e.g., examples, unittests, ccl)
# num_ranks: number of GPU ranks (1, 2, 4, or 8)
# gpu_devices: comma-separated GPU device IDs (optional)
# install_method: pip install method - "git", "editable", or "install" (optional, default: "editable")
# - "git": pip install git+https://github.com/${{ github.repository }}.git@${{ github.sha }}
# - "editable": pip install -e .
# - "install": pip install .

set -e

NUM_RANKS=$1
GPU_DEVICES=${2:-""}
TEST_DIR=$1
NUM_RANKS=$2
GPU_DEVICES=${3:-""}
INSTALL_METHOD=${4:-"editable"}

if [ -z "$NUM_RANKS" ]; then
echo "[ERROR] NUM_RANKS not provided"
echo "Usage: $0 <num_ranks> [gpu_devices]"
if [ -z "$TEST_DIR" ] || [ -z "$NUM_RANKS" ]; then
echo "[ERROR] Missing required arguments"
echo "Usage: $0 <test_dir> <num_ranks> [gpu_devices] [install_method]"
echo " test_dir: examples, unittests, or ccl"
echo " num_ranks: 1, 2, 4, or 8"
echo " install_method: git, editable, or install (default: editable)"
exit 1
fi

# Validate test directory
if [ ! -d "tests/$TEST_DIR" ]; then
echo "[ERROR] Test directory tests/$TEST_DIR does not exist"
exit 1
fi

# Validate install method
if [ "$INSTALL_METHOD" != "git" ] && [ "$INSTALL_METHOD" != "editable" ] && [ "$INSTALL_METHOD" != "install" ]; then
echo "[ERROR] Invalid install_method: $INSTALL_METHOD"
echo " Must be one of: git, editable, install"
exit 1
fi

Expand All @@ -24,28 +49,31 @@ if [ -n "$GPU_DEVICES" ]; then
GPU_ARG="--gpus $GPU_DEVICES"
fi

# Build install command based on method
INSTALL_CMD=""
if [ "$INSTALL_METHOD" = "git" ]; then
# For git install, we need the repository and SHA from environment or use defaults
REPO=${GITHUB_REPOSITORY:-"ROCm/iris"}
SHA=${GITHUB_SHA:-"HEAD"}
INSTALL_CMD="pip install git+https://github.com/${REPO}.git@${SHA}"
elif [ "$INSTALL_METHOD" = "editable" ]; then
INSTALL_CMD="pip install -e ."
elif [ "$INSTALL_METHOD" = "install" ]; then
INSTALL_CMD="pip install ."
fi

# Run tests in container
"$SCRIPT_DIR/container_exec.sh" $GPU_ARG "
set -e
pip install -e .
echo \"Installing iris using method: $INSTALL_METHOD\"
$INSTALL_CMD

# Run examples tests
for test_file in tests/examples/test_*.py; do
echo \"Testing: \$test_file with $NUM_RANKS ranks\"
python tests/run_tests_distributed.py --num_ranks $NUM_RANKS \"\$test_file\" -v --tb=short --durations=10
# Run tests in the specified directory
for test_file in tests/$TEST_DIR/test_*.py; do
if [ -f \"\$test_file\" ]; then
echo \"Testing: \$test_file with $NUM_RANKS ranks (install: $INSTALL_METHOD)\"
python tests/run_tests_distributed.py --num_ranks $NUM_RANKS \"\$test_file\" -v --tb=short --durations=10
fi
done

# Run unit tests
for test_file in tests/unittests/test_*.py; do
echo \"Testing: \$test_file with $NUM_RANKS ranks\"
python tests/run_tests_distributed.py --num_ranks $NUM_RANKS \"\$test_file\" -v --tb=short --durations=10
done

# Run ccl tests
# DISABLED: CCL host-side APIs have issues for some data types/algorithms
# for test_file in tests/ccl/test_*.py; do
# echo \"Testing: \$test_file with $NUM_RANKS ranks\"
# python tests/run_tests_distributed.py --num_ranks $NUM_RANKS \"\$test_file\" -v --tb=short --durations=10
# done
"

3 changes: 3 additions & 0 deletions .github/workflows/iris-external-validation-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}

env:
DOCKER_IMAGE_NAME: ${{ vars.DOCKER_IMAGE_NAME || 'iris-dev-triton-aafec41' }}

jobs:
build-container-image:
runs-on: [self-hosted, mi3008x]
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/iris-performance-regression-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}

env:
DOCKER_IMAGE_NAME: ${{ vars.DOCKER_IMAGE_NAME || 'iris-dev-triton-aafec41' }}

jobs:
build-container-image:
runs-on: [self-hosted, mi3008x]
Expand Down
198 changes: 0 additions & 198 deletions .github/workflows/iris-pip-install-test.yml

This file was deleted.

Loading
Loading