diff --git a/.github/workflows/refresh-nvidia.yml b/.github/workflows/refresh-nvidia.yml new file mode 100644 index 0000000..7a47aed --- /dev/null +++ b/.github/workflows/refresh-nvidia.yml @@ -0,0 +1,48 @@ +name: Update NVIDIA driver versions +on: + # NVIDIA Unix driver releases are infrequent so a weekly check is plenty. + schedule: + - cron: "0 14 * * 1" + workflow_dispatch: +permissions: + contents: write + pull-requests: write +jobs: + refresh: + name: Open PR if NVIDIA versions changed + runs-on: ubuntu-latest + steps: + - name: Harden the runner (Audit all outbound calls) + uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 + with: + egress-policy: audit + - name: checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4 + - name: scrape upstream and rewrite config.yaml + run: python3 ./hack/build/refresh-nvidia-versions.py + - name: open PR if config.yaml changed + # Uses the GitHub API path under the hood so commits are auto-signed + # with the web-flow key (the repo enforces "Verified signatures", which + # blocks plain `git push` from GITHUB_TOKEN). The action is idempotent: + # repeated runs on the same branch update the existing PR. + # + # AUTO_PR_TOKEN, if configured as a PAT in repo secrets, lets the auto-PR + # trigger downstream `pull_request` workflows (the kernel build test). + # Without it we fall back to GITHUB_TOKEN, which can create the PR but + # will not allow CI jobs to autotrigger on PR open - a person still needs + # to kick over/close-reopen the PR to trigger CI. + # TODO bml generate and define a standalone token + uses: peter-evans/create-pull-request@5f6978faf089d4d20b00c7766989d076bb2fc7f1 # v8.1.1 + with: + token: ${{ secrets.AUTO_PR_TOKEN || secrets.GITHUB_TOKEN }} + sign-commits: true + branch: auto/refresh-nvidia + base: main + add-paths: config.yaml + commit-message: "chore: bump NVIDIA driver versions from upstream" + title: "chore: bump NVIDIA driver versions" + body: | + Automated refresh from https://www.nvidia.com/en-us/drivers/unix/. + Review the diff in `config.yaml` and confirm the bumped image tags + build cleanly before merging. + delete-branch: true diff --git a/config.yaml b/config.yaml index 54b493b..20cce70 100644 --- a/config.yaml +++ b/config.yaml @@ -22,6 +22,8 @@ flavors: # version defined here. # Note that the version numbers here can be found on this page: # https://www.nvidia.com/en-us/drivers/unix/ + # Note also that we have scripts to bump these version numbers that + # depend on the below comment anchors being present, for the time being local_tags: - 'nvidia-590.48.01' # Nvidia: "Latest New Feature Branch Version" - 'nvidia-595.45.04' # Nvidia: "Latest Beta Version" diff --git a/hack/build/refresh-nvidia-versions.py b/hack/build/refresh-nvidia-versions.py new file mode 100755 index 0000000..e453bc2 --- /dev/null +++ b/hack/build/refresh-nvidia-versions.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +"""Rewrite zone-nvidiagpu local_tags in config.yaml with the latest versions +published on https://www.nvidia.com/en-us/drivers/unix/. + +Stdlib-only. Only the version digits in each of +the three matching lines change. If no new versions found, should not update the file. +Currently only supports amd64 drivers. A human must review the PR opened by the GH Action that runs this. +""" +import re +import sys +import urllib.request +from pathlib import Path + +NVIDIA_URL = "https://www.nvidia.com/en-us/drivers/unix/" +CONFIG_PATH = Path("config.yaml") + +# The three NVIDIA-page labels we care about, mapped to the literal text used +# in the trailing comment of each local_tags line in config.yaml. The script +# matches lines by the comment label, so the order in config.yaml is free. +LABELS = [ + "Latest Production Branch Version", + "Latest New Feature Branch Version", + "Latest Beta Version", +] + +# Only match Linux x86_64 paragraph for now. +LINUX_X86_64_BLOCK = re.compile( + r"Linux x86_64/AMD64/EM64T(?P
.*?)", + re.DOTALL, +) + + +def fetch_latest_versions(url: str = NVIDIA_URL) -> dict[str, str]: + req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}) + with urllib.request.urlopen(req) as resp: + html = resp.read().decode("utf-8", errors="replace") + + block_match = LINUX_X86_64_BLOCK.search(html) + if not block_match: + raise RuntimeError( + "Could not locate the Linux x86_64 block on %s — page layout may have changed." % url + ) + body = block_match.group("body") + + versions = {} + for label in LABELS: + # The page uses `LABEL: VERSION` + # (yes, "calss" — NVIDIA's typo). Match liberally on the label and the + # next ... so the parser survives small markup tweaks. + pat = re.compile( + re.escape(label) + r":\s*\s*]*>([0-9][0-9.]*[0-9])", + re.IGNORECASE, + ) + m = pat.search(body) + if not m: + raise RuntimeError( + "Could not find version for %r in Linux x86_64 block." % label + ) + versions[label] = m.group(1) + return versions + + +# Matches a local_tags line like: +# - 'nvidia-580.126.18' # Nvidia: "Latest Production Branch Version" +# Captures: prefix (everything up to and including the opening quote), +# the version digits, and suffix (closing quote onward). +LINE_RE = re.compile( + r"^(?P