From 91d76b121d5278c2a310f56a1b038fc6b833b868 Mon Sep 17 00:00:00 2001 From: John Myers Date: Fri, 29 May 2026 14:53:50 -0700 Subject: [PATCH] fix(docker): include z3 runtime in gateway image --- .agents/skills/debug-openshell-cluster/SKILL.md | 1 + deploy/docker/Dockerfile.gateway | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/.agents/skills/debug-openshell-cluster/SKILL.md b/.agents/skills/debug-openshell-cluster/SKILL.md index 6c8f73bb6..de90169a4 100644 --- a/.agents/skills/debug-openshell-cluster/SKILL.md +++ b/.agents/skills/debug-openshell-cluster/SKILL.md @@ -100,6 +100,7 @@ Common findings: - Gateway process stopped: inspect exit status and logs. - Sandbox image missing or pull denied: verify image reference and registry credentials. - Docker driver cannot initialize because it cannot find `openshell-sandbox`: verify `OPENSHELL_DOCKER_SUPERVISOR_BIN`, the sibling binary next to `openshell-gateway`, or the configured supervisor image contains `/openshell-sandbox`. +- Gateway image exits before printing `openshell-gateway --version` with `libz3.so.4: cannot open shared object file`: the gateway binary was built against system Z3 but the runtime image did not include the matching Z3 shared library. The gateway Dockerfile should copy `libz3.so.4` from the Debian 13 package stage into the distroless runtime image. - Sandbox never registers: check gateway logs and supervisor callback endpoint. - Supervisor image exits before printing `openshell-sandbox --version`: the image should be the scratch supervisor image from `deploy/docker/Dockerfile.supervisor` and must contain a static executable at `/openshell-sandbox`. - `mise run e2e:docker:gpu` fails with `docker info --format json did not report any discovered NVIDIA CDI GPU devices`: Docker may report `CDISpecDirs` while still having no generated NVIDIA CDI specs. Verify `.DiscoveredDevices` contains entries such as `nvidia.com/gpu=all`, verify `/etc/cdi` or `/var/run/cdi` contains a generated NVIDIA spec, and check that `nvidia-cdi-refresh.service` and `nvidia-cdi-refresh.path` from NVIDIA Container Toolkit are enabled and healthy. The service is a one-shot unit, so `inactive (dead)` can be normal after a successful run; use `systemctl status` and `journalctl` to distinguish success from a skipped or failed refresh. NVIDIA recommends enabling the path and service units, and restarting `nvidia-cdi-refresh.service` to regenerate missing or stale CDI specs. If specs are generated but Docker still reports no discovered devices, restart Docker or reload the daemon and re-check `docker info`. diff --git a/deploy/docker/Dockerfile.gateway b/deploy/docker/Dockerfile.gateway index 9dd7ed8b9..1c9a987cb 100644 --- a/deploy/docker/Dockerfile.gateway +++ b/deploy/docker/Dockerfile.gateway @@ -16,10 +16,20 @@ # # The runtime is distroless Debian 13, which provides glibc and the dynamic # loader needed by the GNU-linked gateway binary while keeping the attack -# surface small. The default digest currently carries Debian glibc -# 2.41-12+deb13u3. +# surface small. The gateway links to libz3 through openshell-prover, so copy +# the matching Debian runtime library from a Debian 13 package stage. +# The default distroless digest currently carries Debian glibc 2.41-12+deb13u3. ARG GATEWAY_BASE_IMAGE=gcr.io/distroless/cc-debian13:nonroot@sha256:e1fd250ce83d94603e9887ec991156a6c26905a6b0001039b7a43699018c0733 +ARG Z3_PACKAGE_IMAGE=debian:trixie-slim@sha256:b6e2a152f22a40ff69d92cb397223c906017e1391a73c952b588e51af8883bf8 + +FROM ${Z3_PACKAGE_IMAGE} AS z3-runtime + +RUN DEBIAN_FRONTEND=noninteractive apt-get update \ + && apt-get install -y --no-install-recommends libz3-4 \ + && mkdir -p /z3 \ + && cp /usr/lib/*-linux-gnu/libz3.so.4 /z3/libz3.so.4 \ + && rm -rf /var/lib/apt/lists/* FROM ${GATEWAY_BASE_IMAGE} AS gateway @@ -27,8 +37,11 @@ ARG TARGETARCH WORKDIR /app +COPY --from=z3-runtime /z3/libz3.so.4 /usr/lib/libz3.so.4 COPY deploy/docker/.build/prebuilt-binaries/${TARGETARCH}/openshell-gateway /usr/local/bin/openshell-gateway +ENV LD_LIBRARY_PATH=/usr/lib + USER 1000:1000 EXPOSE 8080