From 5084376c72b59e6a963eb06128c429e1ca598cf1 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Wed, 11 Feb 2026 14:54:07 +0100 Subject: [PATCH 01/10] Install GRID driver on Azure NV and NVv3 instances --- download_azure_grid_driver.sh | 66 +++++++++++++++++++ precompiled.Dockerfile | 3 + ubuntu22.04/precompiled/nvidia-driver | 93 ++++++++++++++++++++++++++- 3 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 download_azure_grid_driver.sh diff --git a/download_azure_grid_driver.sh b/download_azure_grid_driver.sh new file mode 100644 index 00000000..774a93e4 --- /dev/null +++ b/download_azure_grid_driver.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +set -eu + +# GRID_INSTALLER_DIR is provided by Dockerfile ENV +GRID_INSTALLER_DIR=${GRID_INSTALLER_DIR:-/opt/nvidia-grid-install} + +get_grid_azure_url() { + local version="$1" + + # Azure GRID driver version mapping + case "$version" in + 550.144.06*) + echo "https://download.microsoft.com/download/c5319e92-672e-4067-8d85-ab66a7a64db3/NVIDIA-Linux-x86_64-550.144.06-grid-azure.run" + ;; + 550.144.03*) + echo "https://download.microsoft.com/download/c/3/4/c3484f19-fe76-4495-a65d-a5222ead9517/NVIDIA-Linux-x86_64-550.144.03-grid-azure.run" + ;; + 535.161.08*) + echo "https://download.microsoft.com/download/8/d/a/8da4fb8e-3a9b-4e6a-bc9a-72ff64d7a13c/NVIDIA-Linux-x86_64-535.161.08-grid-azure.run" + ;; + 535.154.05*) + echo "https://download.microsoft.com/download/1/4/4/14450d0e-a3f2-4b0a-9bb4-a8e729e986c4/NVIDIA-Linux-x86_64-535.154.05-grid-azure.run" + ;; + 535.54.03*) + echo "https://download.microsoft.com/download/2/e/8/2e85b622-d376-4166-be95-38fd60f18eda/NVIDIA-Linux-x86_64-535.54.03-grid-azure.run" + ;; + 525.105.17*) + echo "https://download.microsoft.com/download/6/b/d/6bd2850f-5883-4e2a-9a35-edbd3dd6808c/NVIDIA-Linux-x86_64-525.105.17-grid-azure.run" + ;; + 525.85.05*) + echo "https://download.microsoft.com/download/c/e/9/ce913061-ccf1-4c88-94ff-294e48c55439/NVIDIA-Linux-x86_64-525.85.05-grid-azure.run" + ;; + 525.60.13*) + echo "https://download.microsoft.com/download/1/e/8/1e82a212-9e77-4d74-9455-828d430a39f1/NVIDIA-Linux-x86_64-525.60.13-grid-azure.run" + ;; + *) + echo "" + return 1 + ;; + esac + return 0 +} + +fetch_grid_azure_installer() { + mkdir -p "$GRID_INSTALLER_DIR" + cd "$GRID_INSTALLER_DIR" + + local download_url=$(get_grid_azure_url "$DRIVER_VERSION") + + if [ -z "$download_url" ]; then + echo "ERROR: No Azure GRID driver URL found for version $DRIVER_VERSION" + echo "Available versions: 550.144.06, 550.144.03, 535.161.08, 535.154.05, 535.54.03, 525.105.17, 525.85.05, 525.60.13" + exit 1 + fi + + local filename=$(basename "$download_url") + echo "Downloading GRID driver from: $download_url" + + curl -fSsl -o "$filename" "$download_url" + chmod +x "$filename" + + echo "GRID installer downloaded successfully to $GRID_INSTALLER_DIR/$filename" +} + +fetch_grid_azure_installer diff --git a/precompiled.Dockerfile b/precompiled.Dockerfile index 0f1b760e..effbbaad 100644 --- a/precompiled.Dockerfile +++ b/precompiled.Dockerfile @@ -101,6 +101,9 @@ RUN mkdir -p /opt/nvidia-driver/bin COPY ubuntu22.04/precompiled/nvidia-driver /opt/nvidia-driver/bin/nvidia-driver COPY nvidia-driver-wrapper.sh /usr/local/bin/nvidia-driver +ADD download_azure_grid_driver.sh /tmp +RUN /tmp/download_azure_grid_driver.sh + WORKDIR /drivers ENTRYPOINT ["nvidia-driver", "init"] diff --git a/ubuntu22.04/precompiled/nvidia-driver b/ubuntu22.04/precompiled/nvidia-driver index 5471cd46..8e257f96 100755 --- a/ubuntu22.04/precompiled/nvidia-driver +++ b/ubuntu22.04/precompiled/nvidia-driver @@ -19,6 +19,7 @@ NVIDIA_PEERMEM_MODULE_PARAMS=() TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"} KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto} MODPROBE_CONFIG_DIR="/etc/modprobe.d" +GRID_INSTALLER_DIR=${GRID_INSTALLER_DIR:-/opt/nvidia-grid-install} fabricmanager_install() { apt-get install -y --no-install-recommends --no-download nvidia-fabricmanager-${DRIVER_BRANCH}=${FULL_DRIVER_VERSION} @@ -390,7 +391,7 @@ _resolve_kernel_type() { } # Link and install the kernel modules from a precompiled packages -_install_driver() { +_install_precompiled_driver() { # Install necessary driver userspace packages apt-get install -y --no-install-recommends --no-download \ nvidia-utils-${DRIVER_BRANCH}-server=${FULL_DRIVER_VERSION} \ @@ -415,6 +416,96 @@ _install_driver() { fi } +_install_grid_driver() { + echo "Installing NVIDIA GRID driver from Azure package..." + + if [ ! -d "$GRID_INSTALLER_DIR" ]; then + echo "ERROR: GRID installer directory not found: $GRID_INSTALLER_DIR" + exit 1 + fi + + # Find the .run installer file + local installer_file=$(find "$GRID_INSTALLER_DIR" -maxdepth 1 -type f -name "NVIDIA-Linux-*.run" | head -n 1) + + if [ -z "$installer_file" ]; then + echo "ERROR: GRID installer .run file not found in $GRID_INSTALLER_DIR" + exit 1 + fi + + echo "Using GRID installer: $installer_file" + + # Create temporary directory for installer + local tmpdir="$GRID_INSTALLER_DIR/nvidia-grid-tmp" + mkdir -p "$tmpdir" + + # Install GRID driver using the .run installer + # -s (--silent): non-interactive silent mode + # --dkms: use DKMS to build kernel modules + # --tmpdir: specify temporary directory for installation + bash -c "$installer_file -s --dkms --tmpdir $tmpdir" + + local exit_code=$? + + # Clean up temporary directory + rm -rf "$tmpdir" + + if [ $exit_code -ne 0 ]; then + echo "ERROR: GRID driver installation failed with exit code $exit_code" + exit 1 + fi + + # Updating gridd.conf + echo "Creating GRID config" + cp /etc/nvidia/gridd.conf.template /etc/nvidia/gridd.conf + + # Replace EnableUI in place (handles both commented and uncommented) + sed -i 's/^#\?[[:space:]]*EnableUI=.*/EnableUI=FALSE/' /etc/nvidia/gridd.conf + + # Add EnableUI if not present anywhere in the file + grep -q '^EnableUI=' /etc/nvidia/gridd.conf || echo "EnableUI=FALSE" >> /etc/nvidia/gridd.conf + + # Replace IgnoreSP in place (handles both commented and uncommented) + sed -i 's/^#\?[[:space:]]*IgnoreSP=.*/IgnoreSP=FALSE/' /etc/nvidia/gridd.conf + + # Add IgnoreSP if not present anywhere in the file + grep -q '^IgnoreSP=' /etc/nvidia/gridd.conf || echo "IgnoreSP=FALSE" >> /etc/nvidia/gridd.conf + + # Comment out FeatureType if uncommented + sed -i 's/^FeatureType=/#FeatureType=/' /etc/nvidia/gridd.conf + + echo "GRID driver installed successfully" +} + +_is_azure_grid_driver_required() { + # Get Instance type from the The Azure Instance Metadata Service (IMDS). + local instance_type=$(curl -s -H Metadata:true \ + "http://169.254.169.254/metadata/instance/compute/vmSize?api-version=2021-02-01&format=text") + + # List of Azure VM series that require GRID driver + # NV-series: Virtual Desktop Infrastructure (VDI) and remote visualization workloads + local grid_instance_patterns="^(Standard_NV|Standard_NVv3)" + + # Check if instance type matches any of the GRID-required patterns using regex + if [[ "$instance_type" =~ $grid_instance_patterns ]]; then + echo "Detected Azure instance type: $instance_type, that requires GRID driver" + return 0 # GRID driver required + fi + + return 1 # GRID driver not required +} + +_install_driver() { + # Extract kernel name (what comes after the last '-') + local csp_name="${KERNEL_VERSION##*-}" + + # Check if this is an Azure instance and if it requires GRID driver + if [ "$csp_name" = "azure" ] && _is_azure_grid_driver_required; then + _install_grid_driver + else + _install_precompiled_driver + fi +} + # Mount the driver rootfs into the run directory with the exception of sysfs. _mount_rootfs() { echo "Mounting NVIDIA driver rootfs..." From 5bdc758799c636b2a2c8e928f93b15a59d8e085f Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Thu, 12 Feb 2026 10:57:48 +0100 Subject: [PATCH 02/10] Use sysfs instead of Azure IMDS --- ubuntu22.04/precompiled/nvidia-driver | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/ubuntu22.04/precompiled/nvidia-driver b/ubuntu22.04/precompiled/nvidia-driver index 8e257f96..b1d38341 100755 --- a/ubuntu22.04/precompiled/nvidia-driver +++ b/ubuntu22.04/precompiled/nvidia-driver @@ -477,19 +477,19 @@ _install_grid_driver() { } _is_azure_grid_driver_required() { - # Get Instance type from the The Azure Instance Metadata Service (IMDS). - local instance_type=$(curl -s -H Metadata:true \ - "http://169.254.169.254/metadata/instance/compute/vmSize?api-version=2021-02-01&format=text") - - # List of Azure VM series that require GRID driver - # NV-series: Virtual Desktop Infrastructure (VDI) and remote visualization workloads - local grid_instance_patterns="^(Standard_NV|Standard_NVv3)" + # Check for NVIDIA A10 GPU (vendor: 0x10de, device: 0x2236) + # NVIDIA A10 requires GRID driver on Azure + for dev in /sys/bus/pci/devices/*; do + if [ -f "$dev/vendor" ] && [ -f "$dev/device" ]; then + vendor=$(cat "$dev/vendor") + device=$(cat "$dev/device") - # Check if instance type matches any of the GRID-required patterns using regex - if [[ "$instance_type" =~ $grid_instance_patterns ]]; then - echo "Detected Azure instance type: $instance_type, that requires GRID driver" - return 0 # GRID driver required - fi + if [ "$vendor" = "0x10de" ] && [ "$device" = "0x2236" ]; then + echo "Detected NVIDIA A10 GPU at $(basename $dev), GRID driver required" + return 0 # GRID driver required + fi + fi + done return 1 # GRID driver not required } From c05e53f287740b824d538c85ef4bdcd7f7a5df37 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Thu, 12 Feb 2026 11:55:05 +0100 Subject: [PATCH 03/10] Modify download script accept driver version as argument --- download_azure_grid_driver.sh | 24 ++++++++++++++++++++---- precompiled.Dockerfile | 4 +++- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/download_azure_grid_driver.sh b/download_azure_grid_driver.sh index 774a93e4..22641aeb 100644 --- a/download_azure_grid_driver.sh +++ b/download_azure_grid_driver.sh @@ -5,6 +5,14 @@ set -eu # GRID_INSTALLER_DIR is provided by Dockerfile ENV GRID_INSTALLER_DIR=${GRID_INSTALLER_DIR:-/opt/nvidia-grid-install} +# Available Azure GRID driver versions +AVAILABLE_VERSIONS="550.144.06, 550.144.03, 535.161.08, 535.154.05, 535.54.03, 525.105.17, 525.85.05, 525.60.13" + +print_usage() { + echo "Usage: $0 " + echo "Available versions: $AVAILABLE_VERSIONS" +} + get_grid_azure_url() { local version="$1" @@ -43,14 +51,22 @@ get_grid_azure_url() { } fetch_grid_azure_installer() { + local driver_version="$1" + + if [ -z "$driver_version" ]; then + echo "ERROR: Driver version must be provided as an argument" + print_usage + exit 1 + fi + mkdir -p "$GRID_INSTALLER_DIR" cd "$GRID_INSTALLER_DIR" - local download_url=$(get_grid_azure_url "$DRIVER_VERSION") + local download_url=$(get_grid_azure_url "$driver_version") if [ -z "$download_url" ]; then - echo "ERROR: No Azure GRID driver URL found for version $DRIVER_VERSION" - echo "Available versions: 550.144.06, 550.144.03, 535.161.08, 535.154.05, 535.54.03, 525.105.17, 525.85.05, 525.60.13" + echo "ERROR: No Azure GRID driver URL found for version $driver_version" + print_usage exit 1 fi @@ -63,4 +79,4 @@ fetch_grid_azure_installer() { echo "GRID installer downloaded successfully to $GRID_INSTALLER_DIR/$filename" } -fetch_grid_azure_installer +fetch_grid_azure_installer "$@" diff --git a/precompiled.Dockerfile b/precompiled.Dockerfile index effbbaad..cfd34990 100644 --- a/precompiled.Dockerfile +++ b/precompiled.Dockerfile @@ -102,7 +102,9 @@ COPY ubuntu22.04/precompiled/nvidia-driver /opt/nvidia-driver/bin/nvidia-driver COPY nvidia-driver-wrapper.sh /usr/local/bin/nvidia-driver ADD download_azure_grid_driver.sh /tmp -RUN /tmp/download_azure_grid_driver.sh +# TODO: Azure support only several GRID driver versions. Temporary hardcode the version. +# RUN . /versions.env && /tmp/download_azure_grid_driver.sh "$DRIVER_VERSION" +RUN /tmp/download_azure_grid_driver.sh "535.161.08" WORKDIR /drivers From 86cfb3f30609351445d88e3e4958d5cab34c1911 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Thu, 12 Feb 2026 12:27:32 +0100 Subject: [PATCH 04/10] Add executable permission to the download script --- download_azure_grid_driver.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 download_azure_grid_driver.sh diff --git a/download_azure_grid_driver.sh b/download_azure_grid_driver.sh old mode 100644 new mode 100755 From d29ff40ddc0c75d5c74a09452f04ff9704e72625 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Mon, 16 Feb 2026 18:25:58 +0100 Subject: [PATCH 05/10] Install kernel headers before running the GRID installer --- precompiled.Dockerfile | 13 +++++++++++++ ubuntu22.04/precompiled/nvidia-driver | 6 ++++++ 2 files changed, 19 insertions(+) diff --git a/precompiled.Dockerfile b/precompiled.Dockerfile index cfd34990..62420b45 100644 --- a/precompiled.Dockerfile +++ b/precompiled.Dockerfile @@ -97,6 +97,19 @@ RUN . /versions.env && \ DEP_PACKAGES=$(apt-rdepends $BASE_PACKAGES_NAMES | grep -v "^ " | grep -v "^debconf-2.0$" | grep -v "^linux-image-unsigned-") && \ apt-get install -y --download-only --no-install-recommends --reinstall $BASE_PACKAGES $DEP_PACKAGES +# Remove cuda repository to avoid GPG errors +# Remove cuda repository before downloading dkms to avoid version conflicts +# CUDA repo has dkms 1:3.3.0 but Ubuntu has 2.8.7 - we need Ubuntu version for runtime +RUN rm -f /etc/apt/sources.list.d/cuda* && apt-get update + +# Download kernel headers, dkms, and ALL their dependencies for GRID driver support +# This ensures runtime installation with --no-download will succeed +# Note: We must download gcc-12 explicitly as runtime may prefer it over installed gcc-11 +RUN . /versions.env && \ + apt-get install -y --download-only --no-install-recommends \ + linux-headers-${KERNEL_VERSION} \ + dkms + RUN mkdir -p /opt/nvidia-driver/bin COPY ubuntu22.04/precompiled/nvidia-driver /opt/nvidia-driver/bin/nvidia-driver COPY nvidia-driver-wrapper.sh /usr/local/bin/nvidia-driver diff --git a/ubuntu22.04/precompiled/nvidia-driver b/ubuntu22.04/precompiled/nvidia-driver index b1d38341..f80f39c7 100755 --- a/ubuntu22.04/precompiled/nvidia-driver +++ b/ubuntu22.04/precompiled/nvidia-driver @@ -434,6 +434,12 @@ _install_grid_driver() { echo "Using GRID installer: $installer_file" + # Install kernel headers required for DKMS + echo "Installing kernel headers for ${KERNEL_VERSION}..." + apt-get install --no-install-recommends --no-download -y \ + linux-headers-${KERNEL_VERSION} \ + dkms + # Create temporary directory for installer local tmpdir="$GRID_INSTALLER_DIR/nvidia-grid-tmp" mkdir -p "$tmpdir" From a8881110de74d55a93b4544573313cb4ef409b1d Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Tue, 17 Feb 2026 15:40:15 +0100 Subject: [PATCH 06/10] Fix issue with __acpi_video_get_backlight_type symbol not found --- precompiled.Dockerfile | 8 ++++---- ubuntu22.04/precompiled/nvidia-driver | 9 ++++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/precompiled.Dockerfile b/precompiled.Dockerfile index 62420b45..402e40af 100644 --- a/precompiled.Dockerfile +++ b/precompiled.Dockerfile @@ -102,12 +102,12 @@ RUN . /versions.env && \ # CUDA repo has dkms 1:3.3.0 but Ubuntu has 2.8.7 - we need Ubuntu version for runtime RUN rm -f /etc/apt/sources.list.d/cuda* && apt-get update -# Download kernel headers, dkms, and ALL their dependencies for GRID driver support -# This ensures runtime installation with --no-download will succeed -# Note: We must download gcc-12 explicitly as runtime may prefer it over installed gcc-11 +# Download kernel headers, dkms, linux-modules (for video.ko) for GRID driver support +# linux-modules contains video.ko which nvidia-modeset depends on for __acpi_video_get_backlight_type symbol RUN . /versions.env && \ apt-get install -y --download-only --no-install-recommends \ linux-headers-${KERNEL_VERSION} \ + linux-modules-${KERNEL_VERSION} \ dkms RUN mkdir -p /opt/nvidia-driver/bin @@ -117,7 +117,7 @@ COPY nvidia-driver-wrapper.sh /usr/local/bin/nvidia-driver ADD download_azure_grid_driver.sh /tmp # TODO: Azure support only several GRID driver versions. Temporary hardcode the version. # RUN . /versions.env && /tmp/download_azure_grid_driver.sh "$DRIVER_VERSION" -RUN /tmp/download_azure_grid_driver.sh "535.161.08" +RUN /tmp/download_azure_grid_driver.sh "550.144.06" WORKDIR /drivers diff --git a/ubuntu22.04/precompiled/nvidia-driver b/ubuntu22.04/precompiled/nvidia-driver index f80f39c7..b85d8dd2 100755 --- a/ubuntu22.04/precompiled/nvidia-driver +++ b/ubuntu22.04/precompiled/nvidia-driver @@ -434,10 +434,12 @@ _install_grid_driver() { echo "Using GRID installer: $installer_file" - # Install kernel headers required for DKMS - echo "Installing kernel headers for ${KERNEL_VERSION}..." + # Install kernel headers and modules required for DKMS + # linux-modules provides video.ko which nvidia-modeset depends on for __acpi_video_get_backlight_type symbol + echo "Installing kernel headers and modules for ${KERNEL_VERSION}..." apt-get install --no-install-recommends --no-download -y \ linux-headers-${KERNEL_VERSION} \ + linux-modules-${KERNEL_VERSION} \ dkms # Create temporary directory for installer @@ -446,8 +448,9 @@ _install_grid_driver() { # Install GRID driver using the .run installer # -s (--silent): non-interactive silent mode - # --dkms: use DKMS to build kernel modules + # --dkms: use DKMS to build and load kernel modules automatically # --tmpdir: specify temporary directory for installation + # Note: GRID drivers do not support --skip-module-load option bash -c "$installer_file -s --dkms --tmpdir $tmpdir" local exit_code=$? From 3e1efa4f5dd55652946963e829d8e1b162c7a080 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Thu, 19 Feb 2026 12:56:26 +0100 Subject: [PATCH 07/10] Minor refactoring of function names --- ubuntu22.04/precompiled/nvidia-driver | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/ubuntu22.04/precompiled/nvidia-driver b/ubuntu22.04/precompiled/nvidia-driver index b85d8dd2..2a6b27ce 100755 --- a/ubuntu22.04/precompiled/nvidia-driver +++ b/ubuntu22.04/precompiled/nvidia-driver @@ -485,7 +485,7 @@ _install_grid_driver() { echo "GRID driver installed successfully" } -_is_azure_grid_driver_required() { +_has_nvidia_a10_gpu() { # Check for NVIDIA A10 GPU (vendor: 0x10de, device: 0x2236) # NVIDIA A10 requires GRID driver on Azure for dev in /sys/bus/pci/devices/*; do @@ -495,20 +495,28 @@ _is_azure_grid_driver_required() { if [ "$vendor" = "0x10de" ] && [ "$device" = "0x2236" ]; then echo "Detected NVIDIA A10 GPU at $(basename $dev), GRID driver required" - return 0 # GRID driver required + return 0 # A10 GPU present fi fi done - return 1 # GRID driver not required + return 1 # A10 GPU not present } -_install_driver() { +_is_grid_driver_required() { # Extract kernel name (what comes after the last '-') local csp_name="${KERNEL_VERSION##*-}" - # Check if this is an Azure instance and if it requires GRID driver - if [ "$csp_name" = "azure" ] && _is_azure_grid_driver_required; then + # Check if this is an Azure instance with NVidia A10 GPU + if [ "$csp_name" = "azure" ] && _has_nvidia_a10_gpu; then + return 0 # GRID driver required + fi + + return 1 # GRID driver not required +} + +_install_driver() { + if _is_grid_driver_required; then _install_grid_driver else _install_precompiled_driver From dbea80fbb8235f5013960dacc8ea5857f455b377 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Thu, 19 Feb 2026 17:14:48 +0100 Subject: [PATCH 08/10] Fix installer for precompiled driver --- precompiled.Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/precompiled.Dockerfile b/precompiled.Dockerfile index 402e40af..9c38838b 100644 --- a/precompiled.Dockerfile +++ b/precompiled.Dockerfile @@ -97,10 +97,11 @@ RUN . /versions.env && \ DEP_PACKAGES=$(apt-rdepends $BASE_PACKAGES_NAMES | grep -v "^ " | grep -v "^debconf-2.0$" | grep -v "^linux-image-unsigned-") && \ apt-get install -y --download-only --no-install-recommends --reinstall $BASE_PACKAGES $DEP_PACKAGES -# Remove cuda repository to avoid GPG errors # Remove cuda repository before downloading dkms to avoid version conflicts # CUDA repo has dkms 1:3.3.0 but Ubuntu has 2.8.7 - we need Ubuntu version for runtime -RUN rm -f /etc/apt/sources.list.d/cuda* && apt-get update +# Note: We remove repo files but don't run apt-get update to preserve package cache +# for runtime installation of precompiled driver packages +RUN rm -f /etc/apt/sources.list.d/cuda* # Download kernel headers, dkms, linux-modules (for video.ko) for GRID driver support # linux-modules contains video.ko which nvidia-modeset depends on for __acpi_video_get_backlight_type symbol From 92c969201ad1e73fc727b632ed7c94767846c4d9 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Mon, 23 Feb 2026 11:11:20 +0100 Subject: [PATCH 09/10] Address review comments --- download_azure_grid_driver.sh | 17 +---- precompiled.Dockerfile | 30 ++++---- ubuntu22.04/precompiled/grid-driver | 103 ++++++++++++++++++++++++++ ubuntu22.04/precompiled/nvidia-driver | 102 +------------------------ 4 files changed, 120 insertions(+), 132 deletions(-) create mode 100644 ubuntu22.04/precompiled/grid-driver diff --git a/download_azure_grid_driver.sh b/download_azure_grid_driver.sh index 22641aeb..507791d1 100755 --- a/download_azure_grid_driver.sh +++ b/download_azure_grid_driver.sh @@ -6,7 +6,7 @@ set -eu GRID_INSTALLER_DIR=${GRID_INSTALLER_DIR:-/opt/nvidia-grid-install} # Available Azure GRID driver versions -AVAILABLE_VERSIONS="550.144.06, 550.144.03, 535.161.08, 535.154.05, 535.54.03, 525.105.17, 525.85.05, 525.60.13" +AVAILABLE_VERSIONS="550.144.06, 535.161.08, 525.105.17" print_usage() { echo "Usage: $0 " @@ -21,27 +21,12 @@ get_grid_azure_url() { 550.144.06*) echo "https://download.microsoft.com/download/c5319e92-672e-4067-8d85-ab66a7a64db3/NVIDIA-Linux-x86_64-550.144.06-grid-azure.run" ;; - 550.144.03*) - echo "https://download.microsoft.com/download/c/3/4/c3484f19-fe76-4495-a65d-a5222ead9517/NVIDIA-Linux-x86_64-550.144.03-grid-azure.run" - ;; 535.161.08*) echo "https://download.microsoft.com/download/8/d/a/8da4fb8e-3a9b-4e6a-bc9a-72ff64d7a13c/NVIDIA-Linux-x86_64-535.161.08-grid-azure.run" ;; - 535.154.05*) - echo "https://download.microsoft.com/download/1/4/4/14450d0e-a3f2-4b0a-9bb4-a8e729e986c4/NVIDIA-Linux-x86_64-535.154.05-grid-azure.run" - ;; - 535.54.03*) - echo "https://download.microsoft.com/download/2/e/8/2e85b622-d376-4166-be95-38fd60f18eda/NVIDIA-Linux-x86_64-535.54.03-grid-azure.run" - ;; 525.105.17*) echo "https://download.microsoft.com/download/6/b/d/6bd2850f-5883-4e2a-9a35-edbd3dd6808c/NVIDIA-Linux-x86_64-525.105.17-grid-azure.run" ;; - 525.85.05*) - echo "https://download.microsoft.com/download/c/e/9/ce913061-ccf1-4c88-94ff-294e48c55439/NVIDIA-Linux-x86_64-525.85.05-grid-azure.run" - ;; - 525.60.13*) - echo "https://download.microsoft.com/download/1/e/8/1e82a212-9e77-4d74-9455-828d430a39f1/NVIDIA-Linux-x86_64-525.60.13-grid-azure.run" - ;; *) echo "" return 1 diff --git a/precompiled.Dockerfile b/precompiled.Dockerfile index 9c38838b..42abf168 100644 --- a/precompiled.Dockerfile +++ b/precompiled.Dockerfile @@ -97,29 +97,27 @@ RUN . /versions.env && \ DEP_PACKAGES=$(apt-rdepends $BASE_PACKAGES_NAMES | grep -v "^ " | grep -v "^debconf-2.0$" | grep -v "^linux-image-unsigned-") && \ apt-get install -y --download-only --no-install-recommends --reinstall $BASE_PACKAGES $DEP_PACKAGES -# Remove cuda repository before downloading dkms to avoid version conflicts -# CUDA repo has dkms 1:3.3.0 but Ubuntu has 2.8.7 - we need Ubuntu version for runtime -# Note: We remove repo files but don't run apt-get update to preserve package cache -# for runtime installation of precompiled driver packages -RUN rm -f /etc/apt/sources.list.d/cuda* - -# Download kernel headers, dkms, linux-modules (for video.ko) for GRID driver support +# Download GRID driver and its dependencie: kernel headers, dkms, linux-modules (for video.ko) — Azure only +COPY download_azure_grid_driver.sh /tmp # linux-modules contains video.ko which nvidia-modeset depends on for __acpi_video_get_backlight_type symbol +# Note: cuda repo is removed (not updated) to preserve package cache while avoiding dkms version conflicts +# (CUDA repo has dkms 1:3.3.0 but Ubuntu has 2.8.7 — we need the Ubuntu version for runtime) +# TODO: Azure supports only several GRID driver versions. Temporary hardcode the version. RUN . /versions.env && \ - apt-get install -y --download-only --no-install-recommends \ - linux-headers-${KERNEL_VERSION} \ - linux-modules-${KERNEL_VERSION} \ - dkms + if [ "${KERNEL_VERSION##*-}" = "azure" ]; then \ + rm -f /etc/apt/sources.list.d/cuda* && \ + apt-get install -y --download-only --no-install-recommends \ + linux-headers-${KERNEL_VERSION} \ + linux-modules-${KERNEL_VERSION} \ + dkms && \ + /tmp/download_azure_grid_driver.sh "550.144.06"; \ + fi RUN mkdir -p /opt/nvidia-driver/bin COPY ubuntu22.04/precompiled/nvidia-driver /opt/nvidia-driver/bin/nvidia-driver +COPY ubuntu22.04/precompiled/grid-driver /opt/nvidia-driver/bin/grid-driver COPY nvidia-driver-wrapper.sh /usr/local/bin/nvidia-driver -ADD download_azure_grid_driver.sh /tmp -# TODO: Azure support only several GRID driver versions. Temporary hardcode the version. -# RUN . /versions.env && /tmp/download_azure_grid_driver.sh "$DRIVER_VERSION" -RUN /tmp/download_azure_grid_driver.sh "550.144.06" - WORKDIR /drivers ENTRYPOINT ["nvidia-driver", "init"] diff --git a/ubuntu22.04/precompiled/grid-driver b/ubuntu22.04/precompiled/grid-driver new file mode 100644 index 00000000..176b0079 --- /dev/null +++ b/ubuntu22.04/precompiled/grid-driver @@ -0,0 +1,103 @@ +#! /bin/bash + +GRID_INSTALLER_DIR=${GRID_INSTALLER_DIR:-/opt/nvidia-grid-install} + +_install_grid_driver() { + echo "Installing NVIDIA GRID driver from Azure package..." + + if [ ! -d "$GRID_INSTALLER_DIR" ]; then + echo "ERROR: GRID installer directory not found: $GRID_INSTALLER_DIR" + exit 1 + fi + + # Find the .run installer file + local installer_file=$(find "$GRID_INSTALLER_DIR" -maxdepth 1 -type f -name "NVIDIA-Linux-*.run" | head -n 1) + + if [ -z "$installer_file" ]; then + echo "ERROR: GRID installer .run file not found in $GRID_INSTALLER_DIR" + exit 1 + fi + + echo "Using GRID installer: $installer_file" + + # Install kernel headers and modules required for DKMS + # linux-modules provides video.ko which nvidia-modeset depends on for __acpi_video_get_backlight_type symbol + echo "Installing kernel headers and modules for ${KERNEL_VERSION}..." + apt-get install --no-install-recommends --no-download -y \ + linux-headers-${KERNEL_VERSION} \ + linux-modules-${KERNEL_VERSION} \ + dkms + + # Create temporary directory for installer + local tmpdir="$GRID_INSTALLER_DIR/nvidia-grid-tmp" + mkdir -p "$tmpdir" + + # Install GRID driver using the .run installer + # -s (--silent): non-interactive silent mode + # --dkms: use DKMS to build and load kernel modules automatically + # --tmpdir: specify temporary directory for installation + # Note: GRID drivers do not support --skip-module-load option + bash -c "$installer_file -s --dkms --tmpdir $tmpdir" + + local exit_code=$? + + # Clean up temporary directory + rm -rf "$tmpdir" + + if [ $exit_code -ne 0 ]; then + echo "ERROR: GRID driver installation failed with exit code $exit_code" + exit 1 + fi + + # Updating gridd.conf as required for Azure NV/NVv3 VMs. + # See: https://learn.microsoft.com/en-us/azure/virtual-machines/linux/n-series-driver-setup#install-grid-drivers-on-nv-or-nvv3-series-vms + echo "Creating GRID config" + cp /etc/nvidia/gridd.conf.template /etc/nvidia/gridd.conf + + # Replace EnableUI in place (handles both commented and uncommented) + sed -i 's/^#\?[[:space:]]*EnableUI=.*/EnableUI=FALSE/' /etc/nvidia/gridd.conf + + # Add EnableUI if not present anywhere in the file + grep -q '^EnableUI=' /etc/nvidia/gridd.conf || echo "EnableUI=FALSE" >> /etc/nvidia/gridd.conf + + # Replace IgnoreSP in place (handles both commented and uncommented) + sed -i 's/^#\?[[:space:]]*IgnoreSP=.*/IgnoreSP=FALSE/' /etc/nvidia/gridd.conf + + # Add IgnoreSP if not present anywhere in the file + grep -q '^IgnoreSP=' /etc/nvidia/gridd.conf || echo "IgnoreSP=FALSE" >> /etc/nvidia/gridd.conf + + # Comment out FeatureType if uncommented + sed -i 's/^FeatureType=/#FeatureType=/' /etc/nvidia/gridd.conf + + echo "GRID driver installed successfully" +} + +_has_nvidia_a10_gpu() { + # Check for NVIDIA A10 GPU (vendor: 0x10de, device: 0x2236) + # NVIDIA A10 requires GRID driver on Azure + for dev in /sys/bus/pci/devices/*; do + if [ -f "$dev/vendor" ] && [ -f "$dev/device" ]; then + vendor=$(cat "$dev/vendor") + device=$(cat "$dev/device") + + if [ "$vendor" = "0x10de" ] && [ "$device" = "0x2236" ]; then + echo "Detected NVIDIA A10 GPU at $(basename $dev), GRID driver required" + return 0 # A10 GPU present + fi + fi + done + + return 1 # A10 GPU not present +} + +_is_grid_driver_required() { + # Extract CSP name from kernel version (e.g. "azure" from "5.15.0-1040-azure") + local csp_name="${KERNEL_VERSION##*-}" + + # Check if this is an Azure instance with NVidia A10 GPU + if [ "$csp_name" = "azure" ] && _has_nvidia_a10_gpu; then + return 0 # GRID driver required + fi + + return 1 # GRID driver not required +} diff --git a/ubuntu22.04/precompiled/nvidia-driver b/ubuntu22.04/precompiled/nvidia-driver index 2a6b27ce..697b62ca 100755 --- a/ubuntu22.04/precompiled/nvidia-driver +++ b/ubuntu22.04/precompiled/nvidia-driver @@ -19,7 +19,8 @@ NVIDIA_PEERMEM_MODULE_PARAMS=() TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"} KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto} MODPROBE_CONFIG_DIR="/etc/modprobe.d" -GRID_INSTALLER_DIR=${GRID_INSTALLER_DIR:-/opt/nvidia-grid-install} + +source "$(dirname "$0")/grid-driver" fabricmanager_install() { apt-get install -y --no-install-recommends --no-download nvidia-fabricmanager-${DRIVER_BRANCH}=${FULL_DRIVER_VERSION} @@ -416,105 +417,6 @@ _install_precompiled_driver() { fi } -_install_grid_driver() { - echo "Installing NVIDIA GRID driver from Azure package..." - - if [ ! -d "$GRID_INSTALLER_DIR" ]; then - echo "ERROR: GRID installer directory not found: $GRID_INSTALLER_DIR" - exit 1 - fi - - # Find the .run installer file - local installer_file=$(find "$GRID_INSTALLER_DIR" -maxdepth 1 -type f -name "NVIDIA-Linux-*.run" | head -n 1) - - if [ -z "$installer_file" ]; then - echo "ERROR: GRID installer .run file not found in $GRID_INSTALLER_DIR" - exit 1 - fi - - echo "Using GRID installer: $installer_file" - - # Install kernel headers and modules required for DKMS - # linux-modules provides video.ko which nvidia-modeset depends on for __acpi_video_get_backlight_type symbol - echo "Installing kernel headers and modules for ${KERNEL_VERSION}..." - apt-get install --no-install-recommends --no-download -y \ - linux-headers-${KERNEL_VERSION} \ - linux-modules-${KERNEL_VERSION} \ - dkms - - # Create temporary directory for installer - local tmpdir="$GRID_INSTALLER_DIR/nvidia-grid-tmp" - mkdir -p "$tmpdir" - - # Install GRID driver using the .run installer - # -s (--silent): non-interactive silent mode - # --dkms: use DKMS to build and load kernel modules automatically - # --tmpdir: specify temporary directory for installation - # Note: GRID drivers do not support --skip-module-load option - bash -c "$installer_file -s --dkms --tmpdir $tmpdir" - - local exit_code=$? - - # Clean up temporary directory - rm -rf "$tmpdir" - - if [ $exit_code -ne 0 ]; then - echo "ERROR: GRID driver installation failed with exit code $exit_code" - exit 1 - fi - - # Updating gridd.conf - echo "Creating GRID config" - cp /etc/nvidia/gridd.conf.template /etc/nvidia/gridd.conf - - # Replace EnableUI in place (handles both commented and uncommented) - sed -i 's/^#\?[[:space:]]*EnableUI=.*/EnableUI=FALSE/' /etc/nvidia/gridd.conf - - # Add EnableUI if not present anywhere in the file - grep -q '^EnableUI=' /etc/nvidia/gridd.conf || echo "EnableUI=FALSE" >> /etc/nvidia/gridd.conf - - # Replace IgnoreSP in place (handles both commented and uncommented) - sed -i 's/^#\?[[:space:]]*IgnoreSP=.*/IgnoreSP=FALSE/' /etc/nvidia/gridd.conf - - # Add IgnoreSP if not present anywhere in the file - grep -q '^IgnoreSP=' /etc/nvidia/gridd.conf || echo "IgnoreSP=FALSE" >> /etc/nvidia/gridd.conf - - # Comment out FeatureType if uncommented - sed -i 's/^FeatureType=/#FeatureType=/' /etc/nvidia/gridd.conf - - echo "GRID driver installed successfully" -} - -_has_nvidia_a10_gpu() { - # Check for NVIDIA A10 GPU (vendor: 0x10de, device: 0x2236) - # NVIDIA A10 requires GRID driver on Azure - for dev in /sys/bus/pci/devices/*; do - if [ -f "$dev/vendor" ] && [ -f "$dev/device" ]; then - vendor=$(cat "$dev/vendor") - device=$(cat "$dev/device") - - if [ "$vendor" = "0x10de" ] && [ "$device" = "0x2236" ]; then - echo "Detected NVIDIA A10 GPU at $(basename $dev), GRID driver required" - return 0 # A10 GPU present - fi - fi - done - - return 1 # A10 GPU not present -} - -_is_grid_driver_required() { - # Extract kernel name (what comes after the last '-') - local csp_name="${KERNEL_VERSION##*-}" - - # Check if this is an Azure instance with NVidia A10 GPU - if [ "$csp_name" = "azure" ] && _has_nvidia_a10_gpu; then - return 0 # GRID driver required - fi - - return 1 # GRID driver not required -} - _install_driver() { if _is_grid_driver_required; then _install_grid_driver From 1bb9b0b9fd8258171120f433cbcf71e4d20ce955 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Mon, 23 Feb 2026 12:50:18 +0100 Subject: [PATCH 10/10] Use dkms from the cuda package repo --- precompiled.Dockerfile | 3 --- 1 file changed, 3 deletions(-) diff --git a/precompiled.Dockerfile b/precompiled.Dockerfile index 42abf168..2b6755b0 100644 --- a/precompiled.Dockerfile +++ b/precompiled.Dockerfile @@ -100,12 +100,9 @@ RUN . /versions.env && \ # Download GRID driver and its dependencie: kernel headers, dkms, linux-modules (for video.ko) — Azure only COPY download_azure_grid_driver.sh /tmp # linux-modules contains video.ko which nvidia-modeset depends on for __acpi_video_get_backlight_type symbol -# Note: cuda repo is removed (not updated) to preserve package cache while avoiding dkms version conflicts -# (CUDA repo has dkms 1:3.3.0 but Ubuntu has 2.8.7 — we need the Ubuntu version for runtime) # TODO: Azure supports only several GRID driver versions. Temporary hardcode the version. RUN . /versions.env && \ if [ "${KERNEL_VERSION##*-}" = "azure" ]; then \ - rm -f /etc/apt/sources.list.d/cuda* && \ apt-get install -y --download-only --no-install-recommends \ linux-headers-${KERNEL_VERSION} \ linux-modules-${KERNEL_VERSION} \