From 1af04855cc3019f88f5692388763d5c16eab4043 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Fri, 27 Mar 2026 12:34:13 +0100 Subject: [PATCH 01/25] And Action --- .github/workflows/ci.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..87543f3 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,23 @@ +name: CI + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y build-essential openmpi-bin libopenmpi-dev + - name: Build first example + working-directory: 03-H_Multi_GPU_Parallelization/solutions + env: + MPI_HOME: /usr + run: | + make jacobi From 1bf8ae4d69dbac5b81c9d830b98b3e04744a38db Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Fri, 27 Mar 2026 13:27:08 +0100 Subject: [PATCH 02/25] Add CUDA container image --- .github/workflows/ci.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 87543f3..fe6d382 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,20 +1,19 @@ name: CI - on: push: branches: [ main ] pull_request: branches: [ main ] - jobs: build: runs-on: ubuntu-latest + container: + image: nvidia/cuda:13.2.0-devel-ubuntu24.04 steps: - uses: actions/checkout@v3 - name: Install dependencies run: | - sudo apt-get update - sudo apt-get install -y build-essential openmpi-bin libopenmpi-dev + apt-get update && apt-get install -y build-essential openmpi-bin libopenmpi-dev - name: Build first example working-directory: 03-H_Multi_GPU_Parallelization/solutions env: From b759d09cc45b7fb278838a6af3bfbe61c12632a4 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Fri, 27 Mar 2026 13:32:52 +0100 Subject: [PATCH 03/25] fix OpenMPI path --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fe6d382..4bc44d1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,6 +17,6 @@ jobs: - name: Build first example working-directory: 03-H_Multi_GPU_Parallelization/solutions env: - MPI_HOME: /usr - run: | + MPI_HOME: /usr/lib/x86_64-linux-gnu/openmpi + run: | make jacobi From 6c99fbe8814911692fa04419483fe928b0d03022 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Fri, 27 Mar 2026 13:43:15 +0100 Subject: [PATCH 04/25] Fix vim auto-linting --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4bc44d1..803e40a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,5 +18,5 @@ jobs: working-directory: 03-H_Multi_GPU_Parallelization/solutions env: MPI_HOME: /usr/lib/x86_64-linux-gnu/openmpi - run: | + run: | make jacobi From 1fe77c655901d5ac4a7cb9b797c6952134f5ea38 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Fri, 27 Mar 2026 14:41:10 +0100 Subject: [PATCH 05/25] Expand coverage --- .github/workflows/ci.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 803e40a..8499b70 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,6 +6,15 @@ on: branches: [ main ] jobs: build: + strategy: + matrix: + exercise: + - 03-H_Multi_GPU_Parallelization/solutions + - 06-H_Overlap_Communication_and_Computation_MPI/solutions + - 08-H_NCCL_NVSHMEM/solutions/NCCL + - 08-H_NCCL_NVSHMEM/solutions/NVSHMEM + - /10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/ + - /10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/ runs-on: ubuntu-latest container: image: nvidia/cuda:13.2.0-devel-ubuntu24.04 @@ -15,7 +24,7 @@ jobs: run: | apt-get update && apt-get install -y build-essential openmpi-bin libopenmpi-dev - name: Build first example - working-directory: 03-H_Multi_GPU_Parallelization/solutions + working-directory: ${{ matrix.exercise }} env: MPI_HOME: /usr/lib/x86_64-linux-gnu/openmpi run: | From 6bf0735139ea6a5551437c4eaf241ed69f7dde08 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Fri, 27 Mar 2026 14:47:43 +0100 Subject: [PATCH 06/25] Small correction --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8499b70..aa6374e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,8 +13,8 @@ jobs: - 06-H_Overlap_Communication_and_Computation_MPI/solutions - 08-H_NCCL_NVSHMEM/solutions/NCCL - 08-H_NCCL_NVSHMEM/solutions/NVSHMEM - - /10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/ - - /10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/ + - 10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM + - 10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs runs-on: ubuntu-latest container: image: nvidia/cuda:13.2.0-devel-ubuntu24.04 From ebead816b389e090639b4cf1718b63d11057dbec Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Fri, 27 Mar 2026 14:55:00 +0100 Subject: [PATCH 07/25] Fail slow --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index aa6374e..84b3a00 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,6 +7,7 @@ on: jobs: build: strategy: + fail-fast: false matrix: exercise: - 03-H_Multi_GPU_Parallelization/solutions From 21b091e655788c9d4520ce6c14a942729b276712 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Wed, 6 May 2026 14:04:17 +0200 Subject: [PATCH 08/25] Update container to NVHPC to get something with NCCL, NVSHMEM --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 84b3a00..7102dc1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,7 @@ jobs: - 10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs runs-on: ubuntu-latest container: - image: nvidia/cuda:13.2.0-devel-ubuntu24.04 + image: nvcr.io/nvidia/nvhpc:26.3-devel-cuda13.1-ubuntu24.04 steps: - uses: actions/checkout@v3 - name: Install dependencies From 7847b793b81ac97e2b0563300b77baaab67fddc0 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Wed, 6 May 2026 14:37:25 +0200 Subject: [PATCH 09/25] Set env vars which are required --- .github/workflows/ci.yml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7102dc1..8779bc4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,14 +19,18 @@ jobs: runs-on: ubuntu-latest container: image: nvcr.io/nvidia/nvhpc:26.3-devel-cuda13.1-ubuntu24.04 + env: + NVHPC_ROOT: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3 + CUDA_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1 + MPI_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/mpi + NCCL_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/math_libs/nccl + NVSHMEM_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem + PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/compilers/bin:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/bin:${{ github.workspace }}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + CPATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/include + LD_LIBRARY_PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/lib64 steps: - uses: actions/checkout@v3 - - name: Install dependencies - run: | - apt-get update && apt-get install -y build-essential openmpi-bin libopenmpi-dev - - name: Build first example + - name: Test Building working-directory: ${{ matrix.exercise }} - env: - MPI_HOME: /usr/lib/x86_64-linux-gnu/openmpi run: | make jacobi From 804b2310a8c1716f94efd6a35c9f2214cfe1d6d4 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Wed, 6 May 2026 14:39:22 +0200 Subject: [PATCH 10/25] change MPI location --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8779bc4..920d41f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,8 @@ jobs: env: NVHPC_ROOT: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3 CUDA_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1 - MPI_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/mpi + MPI_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/13.1/hpcx/latest/ompi/ + # MPI_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/mpi NCCL_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/math_libs/nccl NVSHMEM_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/compilers/bin:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/bin:${{ github.workspace }}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin From 4aff5c612073a60c9437e1a29a5844038b12a95a Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Wed, 6 May 2026 15:06:33 +0200 Subject: [PATCH 11/25] Maybe fix PATH? --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 920d41f..386d4fa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,7 +26,7 @@ jobs: # MPI_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/mpi NCCL_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/math_libs/nccl NVSHMEM_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem - PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/compilers/bin:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/bin:${{ github.workspace }}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + # PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/compilers/bin:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/bin:${{ github.workspace }}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH} CPATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/include LD_LIBRARY_PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/lib64 steps: From 41e4a090ba0b8a1fbc50ef0497aecca39f7c968d Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Wed, 6 May 2026 15:10:10 +0200 Subject: [PATCH 12/25] Fix NCCL --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 386d4fa..cc94821 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: CUDA_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1 MPI_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/13.1/hpcx/latest/ompi/ # MPI_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/mpi - NCCL_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/math_libs/nccl + NCCL_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/13.1/nccl NVSHMEM_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem # PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/compilers/bin:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/bin:${{ github.workspace }}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH} CPATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/include From f01a6da6c3d1ea08e1605d376d258146a5471436 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Wed, 6 May 2026 15:46:32 +0200 Subject: [PATCH 13/25] Add LD_LIBRARY_PATH --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cc94821..0ab8f55 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,11 +24,11 @@ jobs: CUDA_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1 MPI_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/13.1/hpcx/latest/ompi/ # MPI_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/mpi - NCCL_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/13.1/nccl + NCCL_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nccl/ NVSHMEM_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem # PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/compilers/bin:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/bin:${{ github.workspace }}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH} CPATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/include - LD_LIBRARY_PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/lib64 + LD_LIBRARY_PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/lib64:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem/lib/:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nccl/lib/ steps: - uses: actions/checkout@v3 - name: Test Building From 587526e5107cc06f3426da48f3038f8223390784 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Wed, 6 May 2026 15:52:27 +0200 Subject: [PATCH 14/25] debug --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0ab8f55..28f2af6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,4 +34,5 @@ jobs: - name: Test Building working-directory: ${{ matrix.exercise }} run: | + printenv LD_LIBRARY_PATH make jacobi From 7f6c35d44cfdf8f3814756b7232196a1ffe709e1 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Wed, 6 May 2026 16:00:03 +0200 Subject: [PATCH 15/25] debug --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 28f2af6..807388a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,10 +29,11 @@ jobs: # PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/compilers/bin:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/bin:${{ github.workspace }}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH} CPATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/include LD_LIBRARY_PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/lib64:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem/lib/:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nccl/lib/ + LIBRARY_PATH: ${{ env.LD_LIBRARY_PATH }} steps: - uses: actions/checkout@v3 - name: Test Building working-directory: ${{ matrix.exercise }} run: | - printenv LD_LIBRARY_PATH + printenv LIBRARY_PATH make jacobi From 7cdb09f3c249067067c4ba12389b344d1a14b19c Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Wed, 6 May 2026 16:03:10 +0200 Subject: [PATCH 16/25] debug --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 807388a..0b1c437 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,7 +29,7 @@ jobs: # PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/compilers/bin:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/bin:${{ github.workspace }}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH} CPATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/include LD_LIBRARY_PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/lib64:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem/lib/:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nccl/lib/ - LIBRARY_PATH: ${{ env.LD_LIBRARY_PATH }} + LIBRARY_PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/lib64:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem/lib/:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nccl/lib/ steps: - uses: actions/checkout@v3 - name: Test Building From fd157158c9d94759156872b1c13b43b8a1ec2eb8 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Wed, 6 May 2026 16:24:48 +0200 Subject: [PATCH 17/25] debug --- .github/workflows/ci.yml | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0b1c437..d931ee7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,20 +20,21 @@ jobs: container: image: nvcr.io/nvidia/nvhpc:26.3-devel-cuda13.1-ubuntu24.04 env: - NVHPC_ROOT: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3 + # NVHPC_ROOT: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3 CUDA_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1 - MPI_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/13.1/hpcx/latest/ompi/ - # MPI_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/mpi - NCCL_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nccl/ - NVSHMEM_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem - # PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/compilers/bin:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/bin:${{ github.workspace }}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH} - CPATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/include - LD_LIBRARY_PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/lib64:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem/lib/:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nccl/lib/ - LIBRARY_PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/lib64:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem/lib/:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nccl/lib/ + # MPI_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/13.1/hpcx/latest/ompi/ + # # MPI_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/mpi + # NCCL_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nccl/ + # NVSHMEM_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem + # # PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/compilers/bin:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/bin:${{ github.workspace }}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH} + # CPATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/include + # LD_LIBRARY_PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/lib64:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem/lib/:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nccl/lib/ + # LIBRARY_PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/lib64:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem/lib/:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nccl/lib/ steps: - uses: actions/checkout@v3 - name: Test Building working-directory: ${{ matrix.exercise }} run: | - printenv LIBRARY_PATH + module load nvhpc-hpcx + printenv NVHPC_ROOT make jacobi From bd97de3e565eae64af8a824a2fc41a61aa7ef067 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Wed, 6 May 2026 16:58:39 +0200 Subject: [PATCH 18/25] debug --- .github/workflows/ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d931ee7..ffc75bb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,6 +35,10 @@ jobs: - name: Test Building working-directory: ${{ matrix.exercise }} run: | + source /usr/local/bin/entrypoint.sh + type module + source source /usr/share/lmod/lmod/init/bash + type module module load nvhpc-hpcx printenv NVHPC_ROOT make jacobi From f50ac81da831d26e6dcbfa77b8c2a06d65e98adf Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Wed, 6 May 2026 17:09:24 +0200 Subject: [PATCH 19/25] debug --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ffc75bb..921c717 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,6 +6,9 @@ on: branches: [ main ] jobs: build: + defaults: + run: + shell: bash strategy: fail-fast: false matrix: From c6db7f1adcf2adce80c4355675beafab970db0fa Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Wed, 6 May 2026 17:13:24 +0200 Subject: [PATCH 20/25] debug --- .github/workflows/ci.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 921c717..b3a6695 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,10 +38,6 @@ jobs: - name: Test Building working-directory: ${{ matrix.exercise }} run: | - source /usr/local/bin/entrypoint.sh - type module - source source /usr/share/lmod/lmod/init/bash - type module module load nvhpc-hpcx printenv NVHPC_ROOT make jacobi From cb5ccbe51a5ed67532a1dded47561623f6c13a8b Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Wed, 6 May 2026 17:16:23 +0200 Subject: [PATCH 21/25] debug --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b3a6695..81848f0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,6 +38,8 @@ jobs: - name: Test Building working-directory: ${{ matrix.exercise }} run: | + source /usr/share/lmod/lmod/init/bash + type module module load nvhpc-hpcx printenv NVHPC_ROOT make jacobi From 24853a693e114d8648aadcb99614146060e2d3e7 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Wed, 6 May 2026 17:28:10 +0200 Subject: [PATCH 22/25] debug --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 81848f0..c9b206c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,7 +25,7 @@ jobs: env: # NVHPC_ROOT: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3 CUDA_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1 - # MPI_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/13.1/hpcx/latest/ompi/ + MPI_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/13.1/hpcx/latest/ompi/ # # MPI_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/mpi # NCCL_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nccl/ # NVSHMEM_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem From c3e1d65b25e1934a07c853f16bda107a36430884 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Wed, 6 May 2026 17:32:08 +0200 Subject: [PATCH 23/25] debug --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c9b206c..4e762f8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ jobs: # # PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/compilers/bin:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/bin:${{ github.workspace }}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH} # CPATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/include # LD_LIBRARY_PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/lib64:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem/lib/:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nccl/lib/ - # LIBRARY_PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/lib64:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem/lib/:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nccl/lib/ + LIBRARY_PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/lib64:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem/lib/:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nccl/lib/ steps: - uses: actions/checkout@v3 - name: Test Building From 6380dd320ab444a84660b166fe0b873808f59412 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Wed, 6 May 2026 17:35:08 +0200 Subject: [PATCH 24/25] debug --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4e762f8..ac09eed 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,8 +27,8 @@ jobs: CUDA_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1 MPI_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/13.1/hpcx/latest/ompi/ # # MPI_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/mpi - # NCCL_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nccl/ - # NVSHMEM_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem + NCCL_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nccl/ + NVSHMEM_HOME: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem # # PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/compilers/bin:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/bin:${{ github.workspace }}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH} # CPATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/include # LD_LIBRARY_PATH: /opt/nvidia/hpc_sdk/Linux_x86_64/26.3/cuda/13.1/lib64:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nvshmem/lib/:/opt/nvidia/hpc_sdk/Linux_x86_64/26.3/comm_libs/nccl/lib/ From 6a6217af4f566e2a793a94fcc40ba552a499b0b5 Mon Sep 17 00:00:00 2001 From: Andreas Herten Date: Wed, 6 May 2026 17:39:09 +0200 Subject: [PATCH 25/25] Sanitize a little bit --- .github/workflows/ci.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ac09eed..b4b2bf2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,7 +39,5 @@ jobs: working-directory: ${{ matrix.exercise }} run: | source /usr/share/lmod/lmod/init/bash - type module module load nvhpc-hpcx - printenv NVHPC_ROOT make jacobi