Skip to content

Commit c72f74c

Browse files
authored
Merge branch 'main' into upgrade/ansible-core-2.16
2 parents 755a1af + ee9e5a9 commit c72f74c

File tree

15 files changed

+95
-44
lines changed

15 files changed

+95
-44
lines changed

.github/workflows/trivyscan.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ jobs:
102102
run: sudo guestmount -a /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 -i --ro -o allow_other './${{ steps.manifest.outputs.image-name }}'
103103

104104
- name: Run Trivy vulnerability scanner
105-
uses: aquasecurity/trivy-action@v0.33.1
105+
uses: aquasecurity/trivy-action@0.33.1
106106
with:
107107
scan-type: fs
108108
scan-ref: "${{ steps.manifest.outputs.image-name }}"
@@ -122,7 +122,7 @@ jobs:
122122
category: "${{ matrix.build }}"
123123

124124
- name: Fail if scan has CRITICAL vulnerabilities
125-
uses: aquasecurity/trivy-action@v0.33.1
125+
uses: aquasecurity/trivy-action@0.33.1
126126
with:
127127
scan-type: fs
128128
scan-ref: "${{ steps.manifest.outputs.image-name }}"
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
name: Workflow Cleanup
2+
on:
3+
workflow_dispatch:
4+
# checkov:skip=CKV_GHA_7: "The build output cannot be affected by user parameters other than the build entry point and the top-level source location. GitHub Actions workflow_dispatch inputs MUST be empty. "
5+
inputs:
6+
ci_cloud:
7+
description: 'Select the CI_CLOUD'
8+
required: true
9+
type: choice
10+
options:
11+
- LEAFCLOUD
12+
- SMS
13+
- ARCUS
14+
cluster_name:
15+
description: 'Cluster to delete'
16+
type: string
17+
required: true
18+
19+
20+
permissions:
21+
contents: read
22+
packages: write
23+
# To report GitHub Actions status checks
24+
statuses: write
25+
26+
jobs:
27+
dummy:
28+
name: dummy-job1
29+
runs-on: ubuntu-22.04
30+
env:
31+
CI_CLOUD: ${{ github.event.inputs.ci_cloud }}
32+
CLUSTER_NAME: ${{ github.event.inputs.cluster_name }}
33+
34+
steps:
35+
- name: print input vars
36+
run: |
37+
echo CI_CLOUD: ${{ env.CI_CLOUD }}
38+
echo CLUSTER_NAME: ${{ env.CLUSTER_NAME }}

ansible/roles/compute_init/files/compute-init.yml

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -277,22 +277,13 @@
277277
name: basic_users
278278
when: enable_basic_users
279279

280-
- name: EESSI
281-
when: enable_eessi
282280
# NB: don't need conditional block on enable_compute as have already exited
283281
# if not the case
284-
block:
285-
- name: Copy cvmfs config
286-
ansible.builtin.copy:
287-
src: /var/tmp/cluster/cvmfs/default.local
288-
dest: /etc/cvmfs/default.local
289-
owner: root
290-
group: root
291-
mode: "0644"
292-
293-
- name: Ensure CVMFS config is setup # noqa: no-changed-when
294-
ansible.builtin.command:
295-
cmd: "cvmfs_config setup"
282+
- name: Configure EESSI
283+
ansible.builtin.include_role:
284+
name: eessi
285+
tasks_from: configure.yml
286+
when: enable_eessi
296287

297288
- name: Configure VGPUs
298289
ansible.builtin.include_role:

ansible/roles/compute_init/tasks/export.yml

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -62,17 +62,6 @@
6262
run_once: true
6363
delegate_to: "{{ groups['control'] | first }}"
6464

65-
- name: Copy EESSI CVMFS config to /exports/cluster
66-
ansible.builtin.copy:
67-
src: /etc/cvmfs/default.local
68-
dest: /exports/cluster/cvmfs/default.local
69-
owner: slurm
70-
group: root
71-
mode: "0644"
72-
remote_src: true
73-
run_once: true
74-
delegate_to: "{{ groups['control'] | first }}"
75-
7665
- name: Export cacerts
7766
ansible.builtin.include_role:
7867
name: cacerts

ansible/roles/compute_init/tasks/install.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@
5454
dest: roles/
5555
- src: ../../nhc
5656
dest: roles/
57+
- src: ../../eessi
58+
dest: roles/
5759

5860
- name: Add filter_plugins to ansible.cfg
5961
ansible.builtin.lineinfile:

ansible/roles/cuda/defaults/main.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
# yamllint disable-line rule:line-length
33
cuda_repo_url: "https://developer.download.nvidia.com/compute/cuda/repos/rhel{{ ansible_distribution_major_version }}/{{ ansible_architecture }}/cuda-rhel{{ ansible_distribution_major_version }}.repo"
44
cuda_nvidia_driver_stream: '580-open'
5-
cuda_nvidia_driver_version: '580.82.07-1'
5+
cuda_nvidia_driver_version: '580.105.08-1'
66
cuda_nvidia_driver_pkg: "nvidia-open-3:{{ cuda_nvidia_driver_version }}.el{{ ansible_distribution_major_version }}"
7-
cuda_package_version: '13.0.1-1'
7+
cuda_package_version: '13.0.2-1'
88
cuda_version_short: "{{ (cuda_package_version | split('.'))[0:2] | join('.') }}" # major.minor
99
cuda_packages_default:
1010
- "cuda-toolkit-{{ cuda_package_version }}"

ansible/roles/eessi/tasks/configure.yml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,20 @@
1515
- name: Ensure CVMFS config is setup # noqa: no-changed-when
1616
ansible.builtin.command:
1717
cmd: "cvmfs_config setup"
18+
19+
# configure gpus
20+
- name: Check for NVIDIA GPU
21+
ansible.builtin.stat:
22+
path: /dev/nvidia0
23+
register: nvidia_driver
24+
25+
- name: Set fact if NVIDIA GPU is present
26+
ansible.builtin.set_fact:
27+
has_nvidia_driver: "{{ nvidia_driver.stat.exists | default(false) }}"
28+
29+
- name: Expose GPU drivers
30+
ansible.builtin.shell: |
31+
source /cvmfs/software.eessi.io/versions/2023.06/init/bash
32+
/cvmfs/software.eessi.io/versions/2023.06/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh
33+
when: has_nvidia_driver
34+
changed_when: true

ansible/roles/openondemand/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,10 @@ This role enables SSL on the Open Ondemand server, using the following self-sign
6969
- `new_window`: Optional. Whether to open link in new window. Bool, default `false`.
7070
- `app_name`: Optional. Unique name for app appended to `/var/www/ood/apps/sys/`. Default is `name`, useful if that is not unique or not suitable as a path component.
7171
- `openondemand_dashboard_support_url`: Optional. URL or email etc to show as support contact under Help in dashboard. Default `(undefined)`.
72-
- `openondemand_desktop_partition`: Optional. Name of Slurm partition to use for remote desktops. Requires a corresponding group named "openondemand_desktop" and entry in openhpc_partitions.
72+
- `openondemand_desktop_partition`: Optional. Name of Slurm partition to use for remote desktops, by default supplied with `openhpc_partitions` entry. During open ondemand config the string is used to provide a default partition in the UX. During image build, with `openondemand` group, setting this partition as a boolean determines if app installed in image.
7373
- `openondemand_desktop_screensaver`: Optional. Whether to enable screen locking/screensaver. **NB:** Users must have passwords if this is enabled. Bool, default `false`.
7474
- `openondemand_filesapp_paths`: List of paths (in addition to $HOME, which is always added) to include shortcuts to within the Files dashboard app.
75-
- `openondemand_jupyter_partition`: Required. Name of Slurm partition to use for Jupyter Notebook servers. Requires a corresponding group named "openondemand_jupyter" and entry in openhpc_partitions.
75+
- `openondemand_jupyter_partition`: Required. Name of Slurm partition to use for Jupyter Notebook servers, by default supplied with `openhpc_partitions` entry. During open ondemand config the string is used to provide a default partition in the UX. During image build, with `openondemand` group, setting this partition as a boolean determines if app installed in image.
7676
- `openondemand_gres_options`: Optional. A list of `[label, value]` items used
7777
to provide a drop-down for resource/GRES selection in application forms. The
7878
default constructs a list from all GRES definitions in the cluster. See the

docs/experimental/slurm-controlled-rebuild.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ compute = {
117117

118118
- `name`: Partition name matching `rebuild` role variable `rebuild_partitions`,
119119
default `rebuild`.
120-
- `groups`: A list of nodegroup names, matching `openhpc_nodegroup` and
120+
- `nodegroups`: A list of nodegroup names, matching `openhpc_nodegroup` and
121121
keys in the OpenTofu `compute` variable (see example in step 2 above).
122122
Normally every compute node group should be listed here, unless
123123
Slurm-controlled rebuild is not required for certain node groups.

docs/openondemand.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,14 @@ The appliance automatically configures Open OnDemand to proxy Grafana and adds a
6363

6464
[^1]: Note that if `openondemand_auth` is `basic_pam` and anonymous Grafana login is enabled, the appliance will (by default) configure Open OnDemand's Apache server to remove the Authorisation header from proxying of all `node/` addresses. This is done as otherwise Grafana tries to use this header to authenticate, which fails with the default configuration where only the admin Grafana user `grafana` is created. Note that the removal of this header in this configuration means it cannot be used to authenticate proxied interactive applications - however the appliance-deployed remote desktop and Jupyter Notebook server applications use other authentication methods. An alternative if using `basic_pam` is not to enable anonymous Grafana login and to create Grafana users matching the local users (e.g. in `environments/<env>/hooks/post.yml`).
6565

66+
## Image Build
67+
68+
By default, most ondemand apps are installed in image builds when the build includes the inventory group `openondemand` (which is the default for "fatimage" builds). The apps installed are
69+
defined by the `openondemand_<app>_partition` variables in `environments/common/inventory/group_vars/all/builder/defaults.yml`. Note that in this case the values are not strings and are instead
70+
simply truthy, i.e. they do not describe cluster partition groups but just whether those apps will be installed in the image or not.
71+
72+
For e.g. site-specific image builds where different app installs are required, due to precedence rules these must overriden in a `builder`-groupvars file e.g. `environments/site/inventory/group_vars/all/builder/defaults.yml`.
73+
6674
## Access
6775

6876
By default the appliance authenticates against OOD with basic auth through PAM. When creating a new environment, a new user with username `demo_user` will be created.

0 commit comments

Comments
 (0)