Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions .github/workflows/test-wheel-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,6 @@ jobs:
# Read base matrix from YAML file for the specific architecture
TEST_MATRIX=$(yq -o json ".linux[\"${MATRIX_TYPE}\"] | map(select(.ARCH == \"${ARCH}\"))" ci/test-matrix.yml)

# Add special runner for amd64 if applicable
if [[ "${ARCH}" == "amd64" ]]; then
SPECIAL_RUNNERS=$(yq -o json '.linux.special_runners.amd64' ci/test-matrix.yml)
TEST_MATRIX=$(echo "$TEST_MATRIX" | yq -o json ". + $SPECIAL_RUNNERS")
fi

# Apply matrix filter and wrap in include structure
MATRIX=$(echo "$TEST_MATRIX" | jq -c '${{ inputs.matrix_filter }} | if (. | length) > 0 then {include: .} else "Error: Empty matrix\n" | halt_error(1) end')

Expand All @@ -73,12 +67,12 @@ jobs:
echo "OLD_BRANCH=${OLD_BRANCH}" >> "$GITHUB_OUTPUT"

test:
name: py${{ matrix.PY_VER }}, ${{ matrix.CUDA_VER }}, ${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}, ${{ matrix.GPU }}
name: py${{ matrix.PY_VER }}, ${{ matrix.CUDA_VER }}, ${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}, ${{ matrix.GPU }}${{ matrix.GPU_COUNT != '1' && format('(x{0})', matrix.GPU_COUNT) || '' }}
needs: compute-matrix
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }}
runs-on: "linux-${{ matrix.ARCH }}-gpu-${{ matrix.GPU }}-${{ matrix.DRIVER }}-1"
runs-on: "linux-${{ matrix.ARCH }}-gpu-${{ matrix.GPU }}-${{ matrix.DRIVER }}-${{ matrix.GPU_COUNT }}"
# The build stage could fail but we want the CI to keep moving.
if: ${{ github.repository_owner == 'nvidia' && !cancelled() }}
# Our self-hosted runners require a container
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-wheel-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
fail-fast: false
matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }}
if: ${{ github.repository_owner == 'nvidia' && !cancelled() }}
runs-on: "windows-${{ matrix.ARCH }}-gpu-${{ matrix.GPU }}-${{ matrix.DRIVER }}-1"
runs-on: "windows-${{ matrix.ARCH }}-gpu-${{ matrix.GPU }}-${{ matrix.DRIVER }}-${{ matrix.GPU_COUNT }}"
steps:
- name: Checkout ${{ github.event.repository.name }}
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
Expand Down
110 changes: 55 additions & 55 deletions ci/test-matrix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#
# Please keep the matrices sorted in ascending order by the following:
#
# [ARCH, PY_VER, CUDA_VER, LOCAL_CTK, GPU, DRIVER]
# [ARCH, PY_VER, CUDA_VER, LOCAL_CTK, GPU, GPU_COUNT, DRIVER]
#
# Windows entries also include DRIVER_MODE.
#
Expand All @@ -17,62 +17,62 @@

linux:
pull-request:
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'v100', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'rtxpro6000', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'v100', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'rtxpro6000', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'rtxpro6000', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'v100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'v100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'H100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'H100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'h100', GPU_COUNT: '2', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 't4', GPU_COUNT: '2', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' }
nightly: []
special_runners:
amd64:
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'H100', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'H100', DRIVER: 'latest' }
Comment on lines -53 to -56
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My only nitpick is that it'd be nice to have a comment or code block that separates out "special runners" (including the 2-GPU ones introduced in this PR) from the regular matrix. It's easier to eyeball and update.


windows:
pull-request:
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'rtx2080', DRIVER: 'latest', DRIVER_MODE: 'WDDM' }
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'rtxpro6000', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'rtxpro6000', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'v100', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'rtx4090', DRIVER: 'latest', DRIVER_MODE: 'WDDM' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'rtx4090', DRIVER: 'latest', DRIVER_MODE: 'WDDM' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'rtxpro6000', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'rtxpro6000', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'v100', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'rtx2080', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'WDDM' }
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'v100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'rtx4090', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'WDDM' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'rtx4090', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'WDDM' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'v100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
nightly: []
10 changes: 6 additions & 4 deletions cuda_core/examples/simple_multi_gpu_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,9 @@ def __cuda_stream__(self):
# Allocate memory on GPU 0
# Note: This runs on CuPy's current stream for GPU 0
dev0.set_current()
a = cp.random.random(size, dtype=dtype)
b = cp.random.random(size, dtype=dtype)
rng = cp.random.default_rng()
a = rng.random(size, dtype=dtype)
b = rng.random(size, dtype=dtype)
Comment on lines -91 to +93
Copy link
Member

@leofang leofang Jan 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Keep a note on this change for posterity:

  1. We want to encourage end users to use the new NumPy/CuPy RNG interface
  2. But the real reason that we must do it in this PR is: The new RNG does not require libcurand to be installed. CuPy is self-contained -- for all of our use cases we just need NVRTC and driver. Our CI relies on this assumption (to save resources).

c = cp.empty_like(a)
cp_stream0 = dev0.create_stream(StreamAdaptor(cp.cuda.get_current_stream()))

Expand All @@ -103,8 +104,9 @@ def __cuda_stream__(self):
# Allocate memory on GPU 1
# Note: This runs on CuPy's current stream for GPU 1.
dev1.set_current()
x = cp.random.random(size, dtype=dtype)
y = cp.random.random(size, dtype=dtype)
rng = cp.random.default_rng()
x = rng.random(size, dtype=dtype)
y = rng.random(size, dtype=dtype)
z = cp.empty_like(a)
cp_stream1 = dev1.create_stream(StreamAdaptor(cp.cuda.get_current_stream()))

Expand Down
Loading