From 723e1326faa3653ffe00dbce46a48bb2045489cd Mon Sep 17 00:00:00 2001 From: Alice Frosi Date: Fri, 8 May 2026 12:38:37 +0000 Subject: [PATCH 1/8] Switch from modular virtqemud to monolithic libvirtd The modular virtqemud daemon does not support TCP connections natively. Switch to monolithic libvirtd with socket activation via the built-in libvirtd-tcp.socket unit for direct TCP access from the host. Assisted-by: Claude --- containerfiles/cluster-image/Containerfile | 14 ++++++++------ .../cluster-image/libvirtd-override.conf | 2 ++ containerfiles/cluster-image/libvirtd.conf | 2 ++ .../cluster-image/virtqemud-override.conf | 2 -- containerfiles/cluster-image/virtqemud.conf | 3 --- 5 files changed, 12 insertions(+), 11 deletions(-) create mode 100644 containerfiles/cluster-image/libvirtd-override.conf create mode 100644 containerfiles/cluster-image/libvirtd.conf delete mode 100644 containerfiles/cluster-image/virtqemud-override.conf delete mode 100644 containerfiles/cluster-image/virtqemud.conf diff --git a/containerfiles/cluster-image/Containerfile b/containerfiles/cluster-image/Containerfile index 83af529..d8a357c 100644 --- a/containerfiles/cluster-image/Containerfile +++ b/containerfiles/cluster-image/Containerfile @@ -19,20 +19,22 @@ RUN dnf install -y --setopt=install_weak_deps=0 \ && dnf clean all COPY qemu.conf /etc/libvirt/qemu.conf -COPY virtqemud.conf /etc/libvirt/virtqemud.conf +COPY libvirtd.conf /etc/libvirt/libvirtd.conf COPY virtiofsd-wrapper /usr/local/bin/virtiofsd-wrapper COPY virtiofsd.service /etc/systemd/system/virtiofsd.service -RUN mkdir -p /etc/systemd/system/virtqemud.service.d -COPY virtqemud-override.conf /etc/systemd/system/virtqemud.service.d/override.conf +RUN mkdir -p /etc/systemd/system/libvirtd.service.d +COPY libvirtd-override.conf /etc/systemd/system/libvirtd.service.d/override.conf RUN chmod +x /usr/local/bin/virtiofsd-wrapper RUN mkdir -p /home/qemu && chown -R qemu:qemu /home/qemu RUN echo 'root:100000:65536' > /etc/subuid && \ echo 'root:100000:65536' > /etc/subgid -RUN systemctl enable virtqemud.service virtlogd.service virtstoraged.service \ - virtnetworkd.service virtiofsd.service && \ - systemctl mask systemd-logind.service getty.target console-getty.service +RUN systemctl enable libvirtd.socket libvirtd-tcp.socket \ + virtlogd.service virtiofsd.service && \ + systemctl mask virtqemud.service virtqemud.socket \ + virtproxyd.service virtproxyd.socket virtproxyd-ro.socket virtproxyd-admin.socket \ + systemd-logind.service getty.target console-getty.service STOPSIGNAL SIGRTMIN+3 ENTRYPOINT ["/sbin/init"] diff --git a/containerfiles/cluster-image/libvirtd-override.conf b/containerfiles/cluster-image/libvirtd-override.conf new file mode 100644 index 0000000..a6d8221 --- /dev/null +++ b/containerfiles/cluster-image/libvirtd-override.conf @@ -0,0 +1,2 @@ +[Service] +Environment=LIBVIRTD_ARGS="--timeout 0" diff --git a/containerfiles/cluster-image/libvirtd.conf b/containerfiles/cluster-image/libvirtd.conf new file mode 100644 index 0000000..38425e9 --- /dev/null +++ b/containerfiles/cluster-image/libvirtd.conf @@ -0,0 +1,2 @@ +auth_tcp = "none" +log_outputs = "1:stderr" diff --git a/containerfiles/cluster-image/virtqemud-override.conf b/containerfiles/cluster-image/virtqemud-override.conf deleted file mode 100644 index bb5a674..0000000 --- a/containerfiles/cluster-image/virtqemud-override.conf +++ /dev/null @@ -1,2 +0,0 @@ -[Service] -Environment=VIRTQEMUD_ARGS="--timeout 0" diff --git a/containerfiles/cluster-image/virtqemud.conf b/containerfiles/cluster-image/virtqemud.conf deleted file mode 100644 index 3943281..0000000 --- a/containerfiles/cluster-image/virtqemud.conf +++ /dev/null @@ -1,3 +0,0 @@ -listen_tls = 0 -listen_tcp = 0 -log_outputs = "1:stderr" From 4eb3d8b572c38ab23dd303f7529a8e1aca41e7a3 Mon Sep 17 00:00:00 2001 From: Alice Frosi Date: Mon, 11 May 2026 12:50:26 +0000 Subject: [PATCH 2/8] Replace virt-install with libvirt Go bindings Use libvirt.org/go/libvirt and libvirt.org/go/libvirtxml to define and start VMs via direct TCP connection to libvirtd instead of shelling out to virt-install via podman exec. Domain configuration uses the functional option pattern with DomainOption closures composed at the call site. Assisted-by: Claude --- Containerfile | 1 + containerfiles/cluster-image/Containerfile | 1 - go.mod | 2 + go.sum | 4 + internal/config/defaults.go | 1 + internal/node/cleanup.go | 3 + internal/node/create.go | 122 ++++------- internal/node/node.go | 6 + internal/virsh/client.go | 124 ++++++----- internal/virsh/domain.go | 240 +++++++++++++++++++++ internal/virsh/types.go | 25 --- 11 files changed, 370 insertions(+), 159 deletions(-) create mode 100644 internal/virsh/domain.go diff --git a/Containerfile b/Containerfile index 3e7f36f..0ca18e7 100644 --- a/Containerfile +++ b/Containerfile @@ -9,6 +9,7 @@ RUN dnf install -y \ gpgme-devel \ btrfs-progs-devel \ device-mapper-devel \ + libvirt-devel \ && dnf clean all WORKDIR /build diff --git a/containerfiles/cluster-image/Containerfile b/containerfiles/cluster-image/Containerfile index d8a357c..aca6f0f 100644 --- a/containerfiles/cluster-image/Containerfile +++ b/containerfiles/cluster-image/Containerfile @@ -8,7 +8,6 @@ RUN dnf install -y --setopt=install_weak_deps=0 \ libvirt-daemon-driver-storage-core \ libvirt-daemon-driver-network \ qemu-kvm \ - virt-install \ virtiofsd \ passt \ iputils \ diff --git a/go.mod b/go.mod index 2c16728..10ace7a 100644 --- a/go.mod +++ b/go.mod @@ -16,6 +16,8 @@ require ( k8s.io/api v0.35.0 k8s.io/apimachinery v0.35.0 k8s.io/client-go v0.35.0 + libvirt.org/go/libvirt v1.12003.0 + libvirt.org/go/libvirtxml v1.12002.0 sigs.k8s.io/yaml v1.6.0 ) diff --git a/go.sum b/go.sum index 05a75c3..615d876 100644 --- a/go.sum +++ b/go.sum @@ -648,6 +648,10 @@ k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 h1:Y3gxNAuB0OBLImH611+UDZ k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ= k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 h1:SjGebBtkBqHFOli+05xYbK8YF1Dzkbzn+gDM4X9T4Ck= k8s.io/utils v0.0.0-20251002143259-bc988d571ff4/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +libvirt.org/go/libvirt v1.12003.0 h1:3ek4ObakscdShZRloa9s8/mGhK7xVduqNmAkb15ZEDQ= +libvirt.org/go/libvirt v1.12003.0/go.mod h1:1WiFE8EjZfq+FCVog+rvr1yatKbKZ9FaFMZgEqxEJqQ= +libvirt.org/go/libvirtxml v1.12002.0 h1:NbEHw+R3IZE0vZF1deCQt+6tA+6Io4pAw9RjS7tM4fs= +libvirt.org/go/libvirtxml v1.12002.0/go.mod h1:7Oq2BLDstLr/XtoQD8Fr3mfDNrzlI3utYKySXF2xkng= sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= diff --git a/internal/config/defaults.go b/internal/config/defaults.go index 9ed2cb4..6400134 100644 --- a/internal/config/defaults.go +++ b/internal/config/defaults.go @@ -37,6 +37,7 @@ const ( ClusterMACPrefix = "52:54:01" DefaultAPIServerPort = 6443 + LibvirtTCPPort = 16509 ServiceCIDR = "10.96.0.0/12" CalicoVersion = "v3.27.0" diff --git a/internal/node/cleanup.go b/internal/node/cleanup.go index 0f687fd..53abef0 100644 --- a/internal/node/cleanup.go +++ b/internal/node/cleanup.go @@ -1,5 +1,8 @@ package node func (n *Node) Cleanup() error { + if n.virsh != nil { + return n.virsh.Close() + } return nil } diff --git a/internal/node/create.go b/internal/node/create.go index cb4b3b5..2620401 100644 --- a/internal/node/create.go +++ b/internal/node/create.go @@ -3,7 +3,6 @@ package node import ( "context" "fmt" - "time" "github.com/bootc-dev/bink/internal/config" "github.com/bootc-dev/bink/internal/podman" @@ -74,16 +73,21 @@ func (n *Node) createContainer(ctx context.Context) error { }, CapAdd: []string{"SYS_ADMIN"}, SelinuxOpts: []string{"disable"}, - } - - if n.IsControlPlane { - opts.PortMappings = []nettypes.PortMapping{ + PortMappings: []nettypes.PortMapping{ { - HostPort: uint16(n.APIPort), - ContainerPort: 6443, + HostPort: 0, + ContainerPort: uint16(config.LibvirtTCPPort), Protocol: "tcp", }, - } + }, + } + + if n.IsControlPlane { + opts.PortMappings = append(opts.PortMappings, nettypes.PortMapping{ + HostPort: uint16(n.APIPort), + ContainerPort: 6443, + Protocol: "tcp", + }) } containerID, err := n.podman.ContainerCreate(ctx, opts) @@ -174,89 +178,41 @@ func (n *Node) createOverlayDisk(ctx context.Context) error { return nil } -func (n *Node) waitForVirtqemud(ctx context.Context) error { - logrus.Debug("Waiting for virtqemud socket...") - for i := range 30 { - if err := ctx.Err(); err != nil { - return err - } - - err := n.podman.ContainerExecQuiet(ctx, n.ContainerName, - []string{"test", "-S", "/var/run/libvirt/virtqemud-sock"}) - if err == nil { - logrus.Debug("virtqemud socket is ready") - return nil - } - if i == 29 { - return fmt.Errorf("virtqemud socket not ready after 30s") - } - select { - case <-ctx.Done(): - return ctx.Err() - case <-time.After(time.Second): - } - } - return nil -} func (n *Node) createVM(ctx context.Context) error { logrus.Infof("Creating VM %s", n.Name) - if err := n.waitForVirtqemud(ctx); err != nil { - return err - } - - overlayDisk := fmt.Sprintf("path=/workspace/%s.qcow2,format=qcow2,bus=virtio", n.Name) - isoPath := fmt.Sprintf("path=/workspace/%s-cloud-init.iso,device=cdrom", n.Name) - - maxMemory := n.MaxMemory - if maxMemory == 0 { - maxMemory = n.Memory + if n.Memory <= 0 || n.VCPUs <= 0 { + return fmt.Errorf("invalid VM configuration: memory=%d vcpus=%d (both must be positive)", n.Memory, n.VCPUs) } - opts := &virsh.VirtInstallOptions{ - Name: n.Name, - Memory: n.Memory, - MaxMemory: maxMemory, - VCPUs: n.VCPUs, - Disks: []string{overlayDisk, isoPath}, - Networks: []virsh.NetworkConfig{ - { - Type: "passt", - Model: "virtio", - PortForward: "2222:22", - }, - { - Type: "mcast", - Model: "virtio", - MAC: n.ClusterMAC, - }, - }, - Filesystems: []virsh.FilesystemConfig{ - { - Source: config.VirtiofsSharedDir, - Target: "cluster_images", - AccessMode: "passthrough", - ReadOnly: false, - }, - }, - XMLModifications: []string{ - "xpath.set=./devices/interface[2]/source/@address=" + config.MulticastAddr, - fmt.Sprintf("xpath.set=./devices/interface[2]/source/@port=%d", config.MulticastPort), - "xpath.set=./devices/filesystem/source/@socket=" + config.VirtiofsSocketPath, - }, + portForwards := []virsh.PortForward{ + {Start: 2222, To: 22}, } - if n.IsControlPlane { - opts.XMLModifications = append(opts.XMLModifications, - "xpath.create=./devices/interface[1]/portForward/range", - "xpath.set=./devices/interface[1]/portForward/range[2]/@start=6443", - "xpath.set=./devices/interface[1]/portForward/range[2]/@to=6443", - ) - } - - if err := n.virsh.VirtInstall(ctx, opts); err != nil { - return fmt.Errorf("creating VM with virt-install: %w", err) + portForwards = append(portForwards, virsh.PortForward{Start: 6443, To: 6443}) + } + + opts := []virsh.DomainOption{ + virsh.WithKVM(), + virsh.WithName(n.Name), + virsh.WithMemory(uint(n.Memory)), + virsh.WithVCPUs(uint(n.VCPUs)), + virsh.WithQ35OS(), + virsh.WithFeatures(), + virsh.WithCPUHostPassthrough(), + virsh.WithMemoryBackingForVirtiofs(), + virsh.WithDisk(fmt.Sprintf("/workspace/%s.qcow2", n.Name), "qcow2", "vda", "virtio"), + virsh.WithCDROM(fmt.Sprintf("/workspace/%s-cloud-init.iso", n.Name)), + virsh.WithPasstInterface(portForwards), + virsh.WithMcastInterface(n.ClusterMAC, config.MulticastAddr, config.MulticastPort), + virsh.WithVirtiofsSocket(config.VirtiofsSocketPath, "cluster_images"), + virsh.WithSerialConsole(), + virsh.WithGuestAgent(), + } + + if err := n.virsh.DefineAndStartDomain(ctx, opts...); err != nil { + return fmt.Errorf("creating VM: %w", err) } logrus.Infof("VM %s created with dual-NIC networking", n.Name) diff --git a/internal/node/node.go b/internal/node/node.go index 1c26391..a7c9dbf 100644 --- a/internal/node/node.go +++ b/internal/node/node.go @@ -167,6 +167,12 @@ func (n *Node) Create(ctx context.Context) error { return fmt.Errorf("creating container: %w", err) } + libvirtPort, err := n.podman.GetPublishedPort(ctx, n.ContainerName, fmt.Sprintf("%d/tcp", config.LibvirtTCPPort)) + if err != nil { + return fmt.Errorf("getting libvirt TCP port: %w", err) + } + n.virsh.SetLibvirtURI(fmt.Sprintf("qemu+tcp://localhost:%d/session", libvirtPort)) + if err := n.setupSSHKeys(ctx); err != nil { return fmt.Errorf("setting up SSH keys: %w", err) } diff --git a/internal/virsh/client.go b/internal/virsh/client.go index c80c313..919848a 100644 --- a/internal/virsh/client.go +++ b/internal/virsh/client.go @@ -4,14 +4,18 @@ import ( "context" "fmt" "strings" + "time" "github.com/bootc-dev/bink/internal/podman" "github.com/sirupsen/logrus" + libvirt "libvirt.org/go/libvirt" ) type Client struct { containerName string podmanClient *podman.Client + libvirtURI string + conn *libvirt.Connect } func NewClient(containerName string, podmanClient *podman.Client) *Client { @@ -21,76 +25,97 @@ func NewClient(containerName string, podmanClient *podman.Client) *Client { } } -func (c *Client) ExecInContainer(ctx context.Context, args ...string) (string, error) { - return c.podmanClient.ContainerExec(ctx, c.containerName, args) +func (c *Client) SetLibvirtURI(uri string) { + c.libvirtURI = uri } -func (c *Client) VirtInstall(ctx context.Context, opts *VirtInstallOptions) error { - var memArg string - if opts.MaxMemory > 0 && opts.MaxMemory > opts.Memory { - memArg = fmt.Sprintf("memory=%d,currentMemory=%d", opts.MaxMemory, opts.Memory) - } else { - memArg = fmt.Sprintf("%d", opts.Memory) +func (c *Client) connect(ctx context.Context) error { + if c.conn != nil { + alive, err := c.conn.IsAlive() + if err == nil && alive { + return nil + } + if _, err := c.conn.Close(); err != nil { + logrus.Debugf("Closing stale libvirt connection: %v", err) + } + c.conn = nil } - args := []string{ - "virt-install", - "--connect", "qemu:///session", - "--name", opts.Name, - "--memory", memArg, - "--vcpus", fmt.Sprintf("%d", opts.VCPUs), - "--import", - "--os-variant", "fedora-unknown", - "--graphics", "none", - "--console", "pty,target_type=serial", - "--noautoconsole", + if c.libvirtURI == "" { + return fmt.Errorf("libvirt URI not set") } - // Add shared memory support if filesystems are present (required for virtiofs) - if len(opts.Filesystems) > 0 { - args = append(args, "--memorybacking", "source.type=memfd,access.mode=shared") - } + var lastErr error + backoff := 500 * time.Millisecond + deadline := time.Now().Add(30 * time.Second) - for _, disk := range opts.Disks { - args = append(args, "--disk", disk) - } + for time.Now().Before(deadline) { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } - for _, network := range opts.Networks { - netArg := network.Type - if network.Model != "" { - netArg += fmt.Sprintf(",model=%s", network.Model) + conn, err := libvirt.NewConnect(c.libvirtURI) + if err == nil { + c.conn = conn + logrus.Debugf("Connected to libvirt at %s", c.libvirtURI) + return nil } - if network.MAC != "" { - netArg += fmt.Sprintf(",mac=%s", network.MAC) + lastErr = err + logrus.Debugf("Retrying libvirt connection to %s: %v", c.libvirtURI, err) + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(backoff): } - if network.PortForward != "" { - netArg += fmt.Sprintf(",portForward=%s", network.PortForward) + if backoff < 5*time.Second { + backoff *= 2 } - args = append(args, "--network", netArg) } - for _, fs := range opts.Filesystems { - // Build filesystem argument for virt-install - // Explicitly specify virtiofs driver - fsArg := fmt.Sprintf("source.dir=%s,target.dir=%s,driver.type=virtiofs", - fs.Source, fs.Target) + return fmt.Errorf("connecting to libvirt at %s after 30s: %w", c.libvirtURI, lastErr) +} - if fs.ReadOnly { - fsArg += ",readonly=on" - } +func (c *Client) Close() error { + if c.conn != nil { + _, err := c.conn.Close() + c.conn = nil + return err + } + return nil +} - args = append(args, "--filesystem", fsArg) +func (c *Client) DefineAndStartDomain(ctx context.Context, opts ...DomainOption) error { + if err := c.connect(ctx); err != nil { + return fmt.Errorf("connecting to libvirt: %w", err) } - for _, xml := range opts.XMLModifications { - args = append(args, "--xml", xml) + xmlStr, err := MarshalDomainXML(opts...) + if err != nil { + return fmt.Errorf("building domain XML: %w", err) } - args = append(args, "--channel", "unix,target.type=virtio,target.name=org.qemu.guest_agent.0") + logrus.Debugf("Defining domain with XML:\n%s", xmlStr) + + dom, err := c.conn.DomainDefineXML(xmlStr) + if err != nil { + return fmt.Errorf("defining domain: %w", err) + } + defer dom.Free() - logrus.Debugf("Creating VM with virt-install: %s", strings.Join(args, " ")) + if err := dom.Create(); err != nil { + return fmt.Errorf("starting domain: %w", err) + } - return c.podmanClient.ContainerExecQuiet(ctx, c.containerName, args) + domain := NewDomain(opts...) + logrus.Infof("Domain %s defined and started via libvirt", domain.Name) + return nil +} + +func (c *Client) ExecInContainer(ctx context.Context, args ...string) (string, error) { + return c.podmanClient.ContainerExec(ctx, c.containerName, args) } func (c *Client) QemuImgCreate(ctx context.Context, opts *QemuImgCreateOptions) error { @@ -129,4 +154,3 @@ func (c *Client) Genisoimage(ctx context.Context, outputPath, volumeID string, f return c.podmanClient.ContainerExecQuiet(ctx, c.containerName, args) } - diff --git a/internal/virsh/domain.go b/internal/virsh/domain.go new file mode 100644 index 0000000..8a78985 --- /dev/null +++ b/internal/virsh/domain.go @@ -0,0 +1,240 @@ +package virsh + +import ( + "fmt" + + "libvirt.org/go/libvirtxml" +) + +type DomainOption func(d *libvirtxml.Domain) + +func NewDomain(opts ...DomainOption) *libvirtxml.Domain { + domain := &libvirtxml.Domain{} + for _, f := range opts { + f(domain) + } + return domain +} + +func allocateDevices(d *libvirtxml.Domain) { + if d.Devices == nil { + d.Devices = &libvirtxml.DomainDeviceList{} + } +} + +func WithKVM() DomainOption { + return func(d *libvirtxml.Domain) { + d.Type = "kvm" + } +} + +func WithName(name string) DomainOption { + return func(d *libvirtxml.Domain) { + d.Name = name + } +} + +func WithMemory(memory uint) DomainOption { + return func(d *libvirtxml.Domain) { + d.Memory = &libvirtxml.DomainMemory{ + Value: memory, + Unit: "MiB", + } + } +} + +func WithVCPUs(cpus uint) DomainOption { + return func(d *libvirtxml.Domain) { + d.VCPU = &libvirtxml.DomainVCPU{Value: cpus} + } +} + +func WithQ35OS() DomainOption { + return func(d *libvirtxml.Domain) { + d.OS = &libvirtxml.DomainOS{ + Type: &libvirtxml.DomainOSType{ + Arch: "x86_64", + Machine: "q35", + Type: "hvm", + }, + BootDevices: []libvirtxml.DomainBootDevice{ + {Dev: "hd"}, + }, + } + } +} + +func WithFeatures() DomainOption { + return func(d *libvirtxml.Domain) { + d.Features = &libvirtxml.DomainFeatureList{ + ACPI: &libvirtxml.DomainFeature{}, + APIC: &libvirtxml.DomainFeatureAPIC{}, + } + } +} + +func WithCPUHostPassthrough() DomainOption { + return func(d *libvirtxml.Domain) { + d.CPU = &libvirtxml.DomainCPU{ + Mode: "host-passthrough", + } + } +} + +func WithMemoryBackingForVirtiofs() DomainOption { + return func(d *libvirtxml.Domain) { + d.MemoryBacking = &libvirtxml.DomainMemoryBacking{ + MemorySource: &libvirtxml.DomainMemorySource{Type: "memfd"}, + MemoryAccess: &libvirtxml.DomainMemoryAccess{Mode: "shared"}, + } + } +} + +func WithDisk(path, format, dev, bus string) DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + d.Devices.Disks = append(d.Devices.Disks, libvirtxml.DomainDisk{ + Device: "disk", + Driver: &libvirtxml.DomainDiskDriver{ + Name: "qemu", + Type: format, + }, + Source: &libvirtxml.DomainDiskSource{ + File: &libvirtxml.DomainDiskSourceFile{File: path}, + }, + Target: &libvirtxml.DomainDiskTarget{ + Dev: dev, + Bus: bus, + }, + }) + } +} + +func WithCDROM(path string) DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + d.Devices.Disks = append(d.Devices.Disks, libvirtxml.DomainDisk{ + Device: "cdrom", + Driver: &libvirtxml.DomainDiskDriver{ + Name: "qemu", + Type: "raw", + }, + Source: &libvirtxml.DomainDiskSource{ + File: &libvirtxml.DomainDiskSourceFile{File: path}, + }, + Target: &libvirtxml.DomainDiskTarget{ + Dev: "sda", + Bus: "sata", + }, + ReadOnly: &libvirtxml.DomainDiskReadOnly{}, + }) + } +} + +type PortForward struct { + Start int + To int +} + +func WithPasstInterface(portForwards []PortForward) DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + iface := libvirtxml.DomainInterface{ + Source: &libvirtxml.DomainInterfaceSource{ + User: &libvirtxml.DomainInterfaceSourceUser{}, + }, + Model: &libvirtxml.DomainInterfaceModel{Type: "virtio"}, + Backend: &libvirtxml.DomainInterfaceBackend{Type: "passt"}, + } + fwd := libvirtxml.DomainInterfaceSourcePortForward{Proto: "tcp"} + for _, pf := range portForwards { + fwd.Ranges = append(fwd.Ranges, libvirtxml.DomainInterfaceSourcePortForwardRange{ + Start: uint(pf.Start), + To: uint(pf.To), + }) + } + if len(fwd.Ranges) > 0 { + iface.PortForward = []libvirtxml.DomainInterfaceSourcePortForward{fwd} + } + d.Devices.Interfaces = append(d.Devices.Interfaces, iface) + } +} + +func WithMcastInterface(mac, addr string, port int) DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + d.Devices.Interfaces = append(d.Devices.Interfaces, libvirtxml.DomainInterface{ + MAC: &libvirtxml.DomainInterfaceMAC{Address: mac}, + Model: &libvirtxml.DomainInterfaceModel{Type: "virtio"}, + Source: &libvirtxml.DomainInterfaceSource{ + MCast: &libvirtxml.DomainInterfaceSourceMCast{ + Address: addr, + Port: uint(port), + }, + }, + }) + } +} + +func WithVirtiofsSocket(socketPath, target string) DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + d.Devices.Filesystems = append(d.Devices.Filesystems, libvirtxml.DomainFilesystem{ + Driver: &libvirtxml.DomainFilesystemDriver{Type: "virtiofs"}, + Source: &libvirtxml.DomainFilesystemSource{ + Mount: &libvirtxml.DomainFilesystemSourceMount{Socket: socketPath}, + }, + Target: &libvirtxml.DomainFilesystemTarget{Dir: target}, + }) + } +} + +func WithSerialConsole() DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + port0 := uint(0) + d.Devices.Serials = append(d.Devices.Serials, libvirtxml.DomainSerial{ + Source: &libvirtxml.DomainChardevSource{ + Pty: &libvirtxml.DomainChardevSourcePty{}, + }, + Target: &libvirtxml.DomainSerialTarget{ + Type: "isa-serial", + Port: &port0, + }, + }) + d.Devices.Consoles = append(d.Devices.Consoles, libvirtxml.DomainConsole{ + Source: &libvirtxml.DomainChardevSource{ + Pty: &libvirtxml.DomainChardevSourcePty{}, + }, + Target: &libvirtxml.DomainConsoleTarget{ + Type: "serial", + Port: &port0, + }, + }) + } +} + +func WithGuestAgent() DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + d.Devices.Channels = append(d.Devices.Channels, libvirtxml.DomainChannel{ + Source: &libvirtxml.DomainChardevSource{ + UNIX: &libvirtxml.DomainChardevSourceUNIX{}, + }, + Target: &libvirtxml.DomainChannelTarget{ + VirtIO: &libvirtxml.DomainChannelTargetVirtIO{ + Name: "org.qemu.guest_agent.0", + }, + }, + }) + } +} + +func MarshalDomainXML(opts ...DomainOption) (string, error) { + domain := NewDomain(opts...) + xml, err := domain.Marshal() + if err != nil { + return "", fmt.Errorf("marshaling domain XML: %w", err) + } + return xml, nil +} diff --git a/internal/virsh/types.go b/internal/virsh/types.go index ebb2d13..b1c8511 100644 --- a/internal/virsh/types.go +++ b/internal/virsh/types.go @@ -1,30 +1,5 @@ package virsh -type VirtInstallOptions struct { - Name string - Memory int - MaxMemory int - VCPUs int - Disks []string - Networks []NetworkConfig - Filesystems []FilesystemConfig - XMLModifications []string -} - -type FilesystemConfig struct { - Source string // Host path (in container) - Target string // Mount tag name for guest - AccessMode string // mapped, passthrough, squash (default: passthrough) - ReadOnly bool -} - -type NetworkConfig struct { - Type string - Model string - MAC string - PortForward string -} - type QemuImgCreateOptions struct { Path string Format string From 2b9403824ee33f15edd61a9c94e468f9c92dc01a Mon Sep 17 00:00:00 2001 From: Alice Frosi Date: Tue, 19 May 2026 12:08:47 +0000 Subject: [PATCH 3/8] Mask all modular libvirt sockets to fix monolithic libvirtd startup The modular daemon sockets (virtnetworkd, virtstoraged) declare Conflicts=libvirtd.socket, preventing the monolithic libvirtd-tcp.socket from starting at boot. Assisted-by: Claude Opus 4.6 (1M context) --- containerfiles/cluster-image/Containerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/containerfiles/cluster-image/Containerfile b/containerfiles/cluster-image/Containerfile index aca6f0f..d630775 100644 --- a/containerfiles/cluster-image/Containerfile +++ b/containerfiles/cluster-image/Containerfile @@ -31,8 +31,11 @@ RUN echo 'root:100000:65536' > /etc/subuid && \ echo 'root:100000:65536' > /etc/subgid RUN systemctl enable libvirtd.socket libvirtd-tcp.socket \ virtlogd.service virtiofsd.service && \ - systemctl mask virtqemud.service virtqemud.socket \ + systemctl mask \ + virtqemud.service virtqemud.socket virtqemud-ro.socket virtqemud-admin.socket \ virtproxyd.service virtproxyd.socket virtproxyd-ro.socket virtproxyd-admin.socket \ + virtnetworkd.service virtnetworkd.socket virtnetworkd-ro.socket virtnetworkd-admin.socket \ + virtstoraged.service virtstoraged.socket virtstoraged-ro.socket virtstoraged-admin.socket \ systemd-logind.service getty.target console-getty.service STOPSIGNAL SIGRTMIN+3 From fecb176205b30006a397a5a78c80e1d1c380fa2f Mon Sep 17 00:00:00 2001 From: Alice Frosi Date: Tue, 19 May 2026 12:45:25 +0000 Subject: [PATCH 4/8] Add libvirt-dev to CI unit test dependencies Assisted-by: Claude Opus 4.6 (1M context) --- .github/workflows/integration-tests.yml | 7 +++++-- .github/workflows/unit-tests.yml | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index aa4b18a..8cd0287 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -55,6 +55,7 @@ jobs: libgpgme-dev \ libbtrfs-dev \ libdevmapper-dev \ + libvirt-dev \ pkg-config - name: Configure Podman @@ -71,7 +72,7 @@ jobs: sudo podman info --format '{{.Store.GraphRoot}}' - name: Build bink binary - run: sudo make build-bink + run: make build-bink - name: Verify prerequisites run: | @@ -80,9 +81,11 @@ jobs: df -h / free -h + - name: Build cluster image from branch + run: sudo make build-cluster-image + - name: Pre-pull container images run: | - sudo podman pull ghcr.io/alicefr/bink/cluster:latest sudo podman pull ghcr.io/alicefr/bink/node:v1.35-fedora-44-disk sudo podman pull ghcr.io/alicefr/bink/dns:latest sudo podman pull docker.io/library/registry:2 diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index ce8a91f..9668ca8 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -32,6 +32,7 @@ jobs: libgpgme-dev \ libbtrfs-dev \ libdevmapper-dev \ + libvirt-dev \ pkg-config - name: Run unit tests From bfdc933e8dd87408fd333a2e973df32f70e1ccd3 Mon Sep 17 00:00:00 2001 From: Alice Frosi Date: Tue, 19 May 2026 14:09:25 +0000 Subject: [PATCH 5/8] Avoid redundant domain construction in DefineAndStartDomain Assisted-by: Claude Opus 4.6 (1M context) --- internal/virsh/client.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/virsh/client.go b/internal/virsh/client.go index 919848a..1de7050 100644 --- a/internal/virsh/client.go +++ b/internal/virsh/client.go @@ -92,7 +92,8 @@ func (c *Client) DefineAndStartDomain(ctx context.Context, opts ...DomainOption) return fmt.Errorf("connecting to libvirt: %w", err) } - xmlStr, err := MarshalDomainXML(opts...) + domain := NewDomain(opts...) + xmlStr, err := domain.Marshal() if err != nil { return fmt.Errorf("building domain XML: %w", err) } @@ -109,7 +110,6 @@ func (c *Client) DefineAndStartDomain(ctx context.Context, opts ...DomainOption) return fmt.Errorf("starting domain: %w", err) } - domain := NewDomain(opts...) logrus.Infof("Domain %s defined and started via libvirt", domain.Name) return nil } From dc8a100b41b4d49fb315be1c1cdd1180b6de6b8d Mon Sep 17 00:00:00 2001 From: Alice Frosi Date: Tue, 19 May 2026 14:43:40 +0000 Subject: [PATCH 6/8] Update ARCHITECTURE.md for monolithic libvirtd and Go bindings Assisted-by: Claude Opus 4.6 (1M context) --- ARCHITECTURE.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 3f1189f..50984a1 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -59,13 +59,13 @@ Bink manages multi-node Kubernetes clusters where each node runs as a rootless P Each Kubernetes node is a Podman container running the `localhost/cluster:latest` image (Fedora 43 with libvirt, QEMU, and virtiofsd). Containers are named `k8s--` (e.g., `k8s-dev-node1`) and labeled with `bink.cluster-name` and `bink.node-name` for discovery. -The container runs four libvirt daemons (`virtlogd`, `virtstoraged`, `virtnetworkd`, `virtqemud`) and a `virtiofsd` instance. It requires `/dev/kvm` for hardware virtualization and `/dev/fuse` for virtiofs, plus `SYS_ADMIN` capability. SELinux is disabled inside the container. +The container runs the monolithic `libvirtd` daemon (with TCP socket on port 16509 for the Go bindings to connect) along with `virtlogd` and a `virtiofsd` instance. All modular libvirt daemons (`virtqemud`, `virtproxyd`, `virtnetworkd`, `virtstoraged`) are masked to avoid conflicts with the monolithic daemon. It requires `/dev/kvm` for hardware virtualization and `/dev/fuse` for virtiofs, plus `SYS_ADMIN` capability. SELinux is disabled inside the container. -Control-plane containers publish port 6443 to a random host port for API access within the cluster. External API access from the host goes through the HAProxy load balancer (see below). +All containers publish the libvirt TCP port (16509) to a random host port so bink can connect to libvirtd from the host via the Go bindings. Control-plane containers additionally publish port 6443 for API access within the cluster. External API access from the host goes through the HAProxy load balancer (see below). ### Virtual Machine -Inside each container, a Fedora bootc VM runs via libvirt/QEMU. The VM boots from a qcow2 overlay disk backed by a shared read-only base image (`fedora-bootc-k8s.qcow2`). Cloud-init configures the VM on first boot: hostname, networking, SSH keys, CRI-O, kubelet, and kernel parameters. +Inside each container, a Fedora bootc VM is defined and started using the libvirt Go bindings (`libvirt.org/go/libvirt` and `libvirt.org/go/libvirtxml`). Bink connects to the monolithic libvirtd via `qemu+tcp://localhost:/session`, constructs the domain XML programmatically, and calls `DomainDefineXML` + `Domain.Create`. The VM boots from a qcow2 overlay disk backed by a shared read-only base image (`fedora-bootc-k8s.qcow2`). Cloud-init configures the VM on first boot: hostname, networking, SSH keys, CRI-O, kubelet, and kernel parameters. The VM runs: - **CRI-O** as the container runtime @@ -208,9 +208,9 @@ A cluster starts with a single control-plane node (`node1`) and can grow by addi 1. Create the Podman bridge network for the cluster 2. Create the `cluster-keys` volume and generate an SSH key pair (RSA 4096-bit) 3. Ensure the global `cluster-images` volume is populated -4. Create the node1 container with libvirt daemons +4. Create the node1 container with monolithic libvirtd 5. Create a qcow2 overlay disk and a cloud-init ISO -6. Boot the VM via virt-install with dual NICs and virtiofs +6. Define and start the VM via libvirt Go bindings (`libvirt.org/go/libvirt`) with dual NICs and virtiofs 7. Wait for cloud-init to complete (configures networking, CRI-O, kubelet) 8. Run `kubeadm init` with the node's cluster IP as the advertise address 9. Install Calico CNI and patch CoreDNS for CRI-O compatibility From faec50841bd85b5893bf9eb33a4ac591d606ebfe Mon Sep 17 00:00:00 2001 From: Alice Frosi Date: Tue, 19 May 2026 14:50:14 +0000 Subject: [PATCH 7/8] Add libvirt-libs to runtime container image Assisted-by: Claude Opus 4.6 (1M context) --- Containerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Containerfile b/Containerfile index 0ca18e7..bc44692 100644 --- a/Containerfile +++ b/Containerfile @@ -33,6 +33,7 @@ RUN dnf install -y \ gpgme \ podman \ kubernetes-client \ + libvirt-libs \ && dnf clean all COPY --from=builder /output/bink /usr/local/bin/bink From a7aa564d7e36ff8755257f9eaae3d1e7e9d3e2ea Mon Sep 17 00:00:00 2001 From: Alice Frosi Date: Wed, 20 May 2026 07:55:55 +0000 Subject: [PATCH 8/8] Build and pre-load cluster image in CI test The switch to libvirtd with TCP socket activation requires the cluster image to be rebuilt. Build it in CI and load it into the nested container to avoid pulling a stale image from the registry. Assisted-by: Claude Opus 4.6 (1M context) --- .github/workflows/test-container-image.yml | 3 +++ hack/test-container-image.sh | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/.github/workflows/test-container-image.yml b/.github/workflows/test-container-image.yml index 7dd013a..8f305fd 100644 --- a/.github/workflows/test-container-image.yml +++ b/.github/workflows/test-container-image.yml @@ -55,6 +55,9 @@ jobs: - name: Build bink image run: sudo make build-bink-image + - name: Build cluster image + run: sudo make build-cluster-image + - name: Test nested mode run: sudo hack/test-container-image.sh nested timeout-minutes: 40 diff --git a/hack/test-container-image.sh b/hack/test-container-image.sh index 424e3fd..2c87bda 100755 --- a/hack/test-container-image.sh +++ b/hack/test-container-image.sh @@ -2,6 +2,7 @@ set -euo pipefail BINK_IMAGE="${BINK_IMAGE:-ghcr.io/alicefr/bink/bink:latest}" +CLUSTER_IMAGE="${CLUSTER_IMAGE:-ghcr.io/alicefr/bink/cluster:latest}" if [ -n "${CONTAINER_HOST:-}" ]; then PODMAN_SOCK="${CONTAINER_HOST#unix://}" elif [ -S "/run/podman/podman.sock" ]; then @@ -56,6 +57,12 @@ run_test() { # unreachable from inside nested podman networks. Override it so inner aardvark-dns # forwards queries to a public resolver instead. podman exec "${nested_container}" bash -c 'echo "nameserver 8.8.8.8" > /etc/resolv.conf' + # Pre-load locally-built images into the nested container to avoid + # pulling from the registry (which may also be stale). + if podman image exists "${CLUSTER_IMAGE}" 2>/dev/null; then + echo "Loading ${CLUSTER_IMAGE} into nested container..." + podman save "${CLUSTER_IMAGE}" | podman exec -i "${nested_container}" podman load + fi bink_args=(podman exec "${nested_container}" bink) ;; *)