From e57912c4446832c5fada1a1c07fb14b2da5eb615 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Fri, 22 May 2026 15:19:42 +0200 Subject: [PATCH 1/3] fix(satellite): bind host /dev directly into satellite pod (Bug 359) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `linstor ps cdp zfs` returned SUCCESS but the resulting StoragePool stayed at `State=Error` with `pool backing storage missing`. Reproduced on e2e3 stand: satellite-side `zpool create` failed deterministically: zpool create -f -O compression=off -O atime=off data /dev/sda: cannot label 'sda': failed to detect device partitions on '/dev/sda1': 19 (ENODEV) Root cause: kubelet hands every privileged container its own private devtmpfs instance for /dev. zpool create stamps the GPT on /dev/sda (kernel creates sda1 + sda9 on the host's devtmpfs), then libzfs immediately open()s /dev/sda1 to write the ZFS label — the inode is not in the container's devtmpfs yet, open() returns ENODEV, the pool is left half-stamped. Bug 346 attempted `mountPropagation: HostToContainer` to slave-mirror host /dev events into the container. That didn't help: rslave updates mount events, not devtmpfs inode visibility for a freshly-mknod'd partition node — and kubelet still allocated a separate devtmpfs. Fix mirrors piraeus's satellite DaemonSet: declare the volume as a plain `hostPath: {path: /dev, type: Directory}` and mount it without mountPropagation. With `type: Directory` kubelet bind-mounts the host's devtmpfs directory directly into the container — same inode table, same partition nodes visible immediately after mknod, no slave-mirror games. Verified against piraeus's working satellite on the same Talos layout (dev5 cluster). No Go-code changes are needed for the mount race; the satellite's exec stays in the container. Bug 359 also surfaces a separate Talos read-only-rootfs issue with `zpool create`'s implicit mkdir — fixed in the follow-up commit (`-m none`). Co-Authored-By: Claude Signed-off-by: Andrei Kvapil --- stand/blockstor-satellite-daemonset.yaml | 33 +++++++++++------------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/stand/blockstor-satellite-daemonset.yaml b/stand/blockstor-satellite-daemonset.yaml index 129e0839..7d470935 100644 --- a/stand/blockstor-satellite-daemonset.yaml +++ b/stand/blockstor-satellite-daemonset.yaml @@ -231,23 +231,20 @@ spec: # a563b1f43) sweeps it on startup. SIGTERM → 10 s → SIGKILL # now reliably tears the pod down inside the rollout budget. volumeMounts: - # Bug 346: mountPropagation HostToContainer. The hostPath - # bind defaults to mountPropagation: None (rprivate), so - # host-side device nodes that appear AFTER container start - # — e.g. /dev/sda1 / /dev/sda9 stamped by `zpool create` - # via the kernel's GPT-rescan — do not propagate into the - # container. libzfs then opens /dev/sda1 to write the ZFS - # label, the inode isn't visible, and zpool aborts with - # `cannot label 'sda': failed to detect device partitions - # on '/dev/sda1': 19` (ENODEV). HostToContainer = MS_SLAVE - # in mount(8) terms: the container receives every host - # mount/inode-creation event under /dev but doesn't push - # any back up — exactly what zpool's partition-rescan - # path needs. Piraeus's satellite happens to work around - # this with sgdisk pre-create + zpool-at-partition-path, - # but blockstor wants the upstream `zpool create /dev/sda` - # one-shot to succeed too. - - {name: dev, mountPath: /dev, mountPropagation: HostToContainer} + # Bug 359: plain bind mount of host /dev — no + # mountPropagation. With `mountPropagation: HostToContainer` + # (Bug 346 attempt) the kubelet still hands the container a + # private devtmpfs instance and the kernel's mknod for + # partition nodes (sda1, sda9) from `zpool create`'s + # GPT-rescan lands in the host's devtmpfs only — libzfs's + # immediate open(/dev/sda1) aborts with ENODEV. The piraeus + # satellite ships /dev as a bare `hostPath: {path: /dev, + # type: Directory}` and inherits the host's devtmpfs + # directly (one inode table shared with the host kernel), + # which is what makes `zpool create /dev/sda` survive the + # rescan on the same Talos layout. Mirror that. The + # `type: Directory` lives on the volume definition below. + - {name: dev, mountPath: /dev} - {name: modules, mountPath: /lib/modules, readOnly: true} - {name: lvm-run, mountPath: /run/lvm} - {name: state, mountPath: /var/lib/blockstor-satellite} @@ -324,7 +321,7 @@ spec: # the upstream image + a /etc/drbd-reactor.d ConfigMap mount; # no other DaemonSet plumbing change is needed. volumes: - - {name: dev, hostPath: {path: /dev}} + - {name: dev, hostPath: {path: /dev, type: Directory}} - {name: modules, hostPath: {path: /lib/modules}} - {name: lvm-run, hostPath: {path: /run/lvm, type: DirectoryOrCreate}} - {name: state, hostPath: {path: /var/lib/blockstor-satellite, type: DirectoryOrCreate}} From 77235179d647753e1fb968b885c014ba8930c626 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Fri, 22 May 2026 15:19:53 +0200 Subject: [PATCH 2/3] fix(satellite/zfs): zpool create -m none on Talos read-only rootfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Followup to the Bug 359 mount fix. `zpool create` tries to mkdir / as a mountpoint when the new pool is imported. On Talos the host rootfs is read-only outside of a small writable allowlist — mkdir fails with EROFS, `zpool create` returns non-zero, blockstor rolls back the SP CRD even though the pool is already on disk + imported. The next reconcile finds the existing pool and bails with EEXIST, leaving the SP perpetually missing. blockstor uses `zfs create -V` (zvol) datasets only — the root pool mountpoint is never load-bearing. `-m none` tells zpool not to allocate a mountpoint at all, sidestepping the EROFS without losing any function. Test prefix assertion in pkg/satellite/attach_test.go is unchanged (it pins `zpool create -f` only). Co-Authored-By: Claude Signed-off-by: Andrei Kvapil --- pkg/satellite/attach.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pkg/satellite/attach.go b/pkg/satellite/attach.go index a3caeab1..977b07f0 100644 --- a/pkg/satellite/attach.go +++ b/pkg/satellite/attach.go @@ -373,7 +373,16 @@ func attachZFS(ctx context.Context, exec storage.Exec, dev *apiv1.PhysicalDevice return AttachResult{}, errors.New("ZFS attach requires ZPoolName") } + // `-m none` skips the implicit `mkdir /` zpool tries on + // successful create. On Talos the rootfs is read-only and the + // mkdir fails — but only AFTER the pool has been stamped on + // disk and imported, so the CLI returns non-zero, blockstor + // rolls back the SP CRD, and the next reconcile finds the + // pool already imported and bails with EEXIST. blockstor uses + // `zfs create -V` volume datasets only, so a non-existent + // `/` mountpoint is never load-bearing. _, err := exec.Run(ctx, "zpool", "create", "-f", + "-m", "none", "-O", "compression=off", "-O", "atime=off", pool, devicePath) From 925d3cd4ac9ee289ace7bb67f98401673d293f7c Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Fri, 22 May 2026 15:33:49 +0200 Subject: [PATCH 3/3] =?UTF-8?q?fix(satellite):=20mirror=20piraeus=20?= =?UTF-8?q?=E2=80=94=20hostIPC=20+=20/run/udev=20mount=20(Bug=20359)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two further pieces of the piraeus satellite Pod spec that blockstor's satellite was missing — both relevant to the `ps cdp zfs` failure (Bug 359): 1. `hostIPC: true`. LVM userspace tooling and libzfs use host-wide SysV/POSIX IPC for whole-host coordination (lvmlockd handshakes, zfs.ko's libzpool ↔ /etc/libnvpair shared keys, etc.). Without hostIPC the satellite owns its own IPC namespace and can race or deadlock against host-side commands that assume the IPC is process-global. Mirrors the `linstor-satellite.nodeN` DaemonSet on the same Talos layout (cozy-linstor namespace). 2. `/run/udev` (ro, hostPath type=Directory). udev's runtime DB lives at /run/udev/data/b:. libzfs/libblkid query it to look up partition metadata (PARTUUID, fs signatures, holders). Without this mount, the satellite sees an empty DB — partition rescan after `zpool create`'s GPT stamp returns nothing, libzfs treats it as "partition not present" and aborts (matches the ENODEV symptom). The previous /dev mount fix (commit `e57912c44`) made the partition node visible; this one makes the udev metadata about it visible too. Together these complete the parity with piraeus's satellite mount shape — the only remaining differences (var-lib-drbd, /etc/lvm breakout, capabilities allow-list) are not load-bearing for ZFS. Co-Authored-By: Claude Signed-off-by: Andrei Kvapil --- stand/blockstor-satellite-daemonset.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/stand/blockstor-satellite-daemonset.yaml b/stand/blockstor-satellite-daemonset.yaml index 7d470935..b4dd6902 100644 --- a/stand/blockstor-satellite-daemonset.yaml +++ b/stand/blockstor-satellite-daemonset.yaml @@ -114,6 +114,13 @@ spec: # from the .res file). Pod IP equals node IP under hostNetwork, # which the satellite Hello flow advertises as its endpoint. hostNetwork: true + # hostIPC mirrors piraeus's satellite. The LVM userland (lvmlockd + # /etc.) and libzfs both rely on host-shared SysV/POSIX IPC for + # whole-host coordination; without hostIPC the satellite has its + # own IPC namespace and can deadlock or skip locks the host-side + # tools assume are global. Same Talos layout where piraeus's + # `linstor-satellite.nodeN` DaemonSet sets this and works. + hostIPC: true # ClusterFirstWithHostNet keeps cluster DNS resolution working # under hostNetwork — without it the satellite can't resolve # `blockstor-controller.blockstor-system.svc`. @@ -247,6 +254,14 @@ spec: - {name: dev, mountPath: /dev} - {name: modules, mountPath: /lib/modules, readOnly: true} - {name: lvm-run, mountPath: /run/lvm} + # Bug 359: host's udev runtime DB. libzfs / libblkid look up + # partition metadata (PARTUUID, fs signatures, holders) via + # /run/udev/data/b:. Without this mount the + # satellite container sees an empty udev DB and partition + # rescan after `zpool create`'s GPT stamp returns no data, + # which libzfs treats as "partition not present" and aborts. + # Piraeus's satellite ships this as ro; we do the same. + - {name: run-udev, mountPath: /run/udev, readOnly: true} - {name: state, mountPath: /var/lib/blockstor-satellite} - {name: pool, mountPath: /var/lib/blockstor-pool} # Bug 310: host-shared /etc/drbd.d/ inside the container, @@ -324,6 +339,7 @@ spec: - {name: dev, hostPath: {path: /dev, type: Directory}} - {name: modules, hostPath: {path: /lib/modules}} - {name: lvm-run, hostPath: {path: /run/lvm, type: DirectoryOrCreate}} + - {name: run-udev, hostPath: {path: /run/udev, type: Directory}} - {name: state, hostPath: {path: /var/lib/blockstor-satellite, type: DirectoryOrCreate}} - {name: pool, hostPath: {path: /var/lib/blockstor-pool, type: DirectoryOrCreate}} # Bug 305: shared with piraeus-satellite (LinstorSatellite-