diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index aa4b18a..8cd0287 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -55,6 +55,7 @@ jobs: libgpgme-dev \ libbtrfs-dev \ libdevmapper-dev \ + libvirt-dev \ pkg-config - name: Configure Podman @@ -71,7 +72,7 @@ jobs: sudo podman info --format '{{.Store.GraphRoot}}' - name: Build bink binary - run: sudo make build-bink + run: make build-bink - name: Verify prerequisites run: | @@ -80,9 +81,11 @@ jobs: df -h / free -h + - name: Build cluster image from branch + run: sudo make build-cluster-image + - name: Pre-pull container images run: | - sudo podman pull ghcr.io/alicefr/bink/cluster:latest sudo podman pull ghcr.io/alicefr/bink/node:v1.35-fedora-44-disk sudo podman pull ghcr.io/alicefr/bink/dns:latest sudo podman pull docker.io/library/registry:2 diff --git a/.github/workflows/test-container-image.yml b/.github/workflows/test-container-image.yml index 7dd013a..8f305fd 100644 --- a/.github/workflows/test-container-image.yml +++ b/.github/workflows/test-container-image.yml @@ -55,6 +55,9 @@ jobs: - name: Build bink image run: sudo make build-bink-image + - name: Build cluster image + run: sudo make build-cluster-image + - name: Test nested mode run: sudo hack/test-container-image.sh nested timeout-minutes: 40 diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index ce8a91f..9668ca8 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -32,6 +32,7 @@ jobs: libgpgme-dev \ libbtrfs-dev \ libdevmapper-dev \ + libvirt-dev \ pkg-config - name: Run unit tests diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 3f1189f..50984a1 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -59,13 +59,13 @@ Bink manages multi-node Kubernetes clusters where each node runs as a rootless P Each Kubernetes node is a Podman container running the `localhost/cluster:latest` image (Fedora 43 with libvirt, QEMU, and virtiofsd). Containers are named `k8s--` (e.g., `k8s-dev-node1`) and labeled with `bink.cluster-name` and `bink.node-name` for discovery. -The container runs four libvirt daemons (`virtlogd`, `virtstoraged`, `virtnetworkd`, `virtqemud`) and a `virtiofsd` instance. It requires `/dev/kvm` for hardware virtualization and `/dev/fuse` for virtiofs, plus `SYS_ADMIN` capability. SELinux is disabled inside the container. +The container runs the monolithic `libvirtd` daemon (with TCP socket on port 16509 for the Go bindings to connect) along with `virtlogd` and a `virtiofsd` instance. All modular libvirt daemons (`virtqemud`, `virtproxyd`, `virtnetworkd`, `virtstoraged`) are masked to avoid conflicts with the monolithic daemon. It requires `/dev/kvm` for hardware virtualization and `/dev/fuse` for virtiofs, plus `SYS_ADMIN` capability. SELinux is disabled inside the container. -Control-plane containers publish port 6443 to a random host port for API access within the cluster. External API access from the host goes through the HAProxy load balancer (see below). +All containers publish the libvirt TCP port (16509) to a random host port so bink can connect to libvirtd from the host via the Go bindings. Control-plane containers additionally publish port 6443 for API access within the cluster. External API access from the host goes through the HAProxy load balancer (see below). ### Virtual Machine -Inside each container, a Fedora bootc VM runs via libvirt/QEMU. The VM boots from a qcow2 overlay disk backed by a shared read-only base image (`fedora-bootc-k8s.qcow2`). Cloud-init configures the VM on first boot: hostname, networking, SSH keys, CRI-O, kubelet, and kernel parameters. +Inside each container, a Fedora bootc VM is defined and started using the libvirt Go bindings (`libvirt.org/go/libvirt` and `libvirt.org/go/libvirtxml`). Bink connects to the monolithic libvirtd via `qemu+tcp://localhost:/session`, constructs the domain XML programmatically, and calls `DomainDefineXML` + `Domain.Create`. The VM boots from a qcow2 overlay disk backed by a shared read-only base image (`fedora-bootc-k8s.qcow2`). Cloud-init configures the VM on first boot: hostname, networking, SSH keys, CRI-O, kubelet, and kernel parameters. The VM runs: - **CRI-O** as the container runtime @@ -208,9 +208,9 @@ A cluster starts with a single control-plane node (`node1`) and can grow by addi 1. Create the Podman bridge network for the cluster 2. Create the `cluster-keys` volume and generate an SSH key pair (RSA 4096-bit) 3. Ensure the global `cluster-images` volume is populated -4. Create the node1 container with libvirt daemons +4. Create the node1 container with monolithic libvirtd 5. Create a qcow2 overlay disk and a cloud-init ISO -6. Boot the VM via virt-install with dual NICs and virtiofs +6. Define and start the VM via libvirt Go bindings (`libvirt.org/go/libvirt`) with dual NICs and virtiofs 7. Wait for cloud-init to complete (configures networking, CRI-O, kubelet) 8. Run `kubeadm init` with the node's cluster IP as the advertise address 9. Install Calico CNI and patch CoreDNS for CRI-O compatibility diff --git a/Containerfile b/Containerfile index 3e7f36f..bc44692 100644 --- a/Containerfile +++ b/Containerfile @@ -9,6 +9,7 @@ RUN dnf install -y \ gpgme-devel \ btrfs-progs-devel \ device-mapper-devel \ + libvirt-devel \ && dnf clean all WORKDIR /build @@ -32,6 +33,7 @@ RUN dnf install -y \ gpgme \ podman \ kubernetes-client \ + libvirt-libs \ && dnf clean all COPY --from=builder /output/bink /usr/local/bin/bink diff --git a/containerfiles/cluster-image/Containerfile b/containerfiles/cluster-image/Containerfile index 83af529..d630775 100644 --- a/containerfiles/cluster-image/Containerfile +++ b/containerfiles/cluster-image/Containerfile @@ -8,7 +8,6 @@ RUN dnf install -y --setopt=install_weak_deps=0 \ libvirt-daemon-driver-storage-core \ libvirt-daemon-driver-network \ qemu-kvm \ - virt-install \ virtiofsd \ passt \ iputils \ @@ -19,20 +18,25 @@ RUN dnf install -y --setopt=install_weak_deps=0 \ && dnf clean all COPY qemu.conf /etc/libvirt/qemu.conf -COPY virtqemud.conf /etc/libvirt/virtqemud.conf +COPY libvirtd.conf /etc/libvirt/libvirtd.conf COPY virtiofsd-wrapper /usr/local/bin/virtiofsd-wrapper COPY virtiofsd.service /etc/systemd/system/virtiofsd.service -RUN mkdir -p /etc/systemd/system/virtqemud.service.d -COPY virtqemud-override.conf /etc/systemd/system/virtqemud.service.d/override.conf +RUN mkdir -p /etc/systemd/system/libvirtd.service.d +COPY libvirtd-override.conf /etc/systemd/system/libvirtd.service.d/override.conf RUN chmod +x /usr/local/bin/virtiofsd-wrapper RUN mkdir -p /home/qemu && chown -R qemu:qemu /home/qemu RUN echo 'root:100000:65536' > /etc/subuid && \ echo 'root:100000:65536' > /etc/subgid -RUN systemctl enable virtqemud.service virtlogd.service virtstoraged.service \ - virtnetworkd.service virtiofsd.service && \ - systemctl mask systemd-logind.service getty.target console-getty.service +RUN systemctl enable libvirtd.socket libvirtd-tcp.socket \ + virtlogd.service virtiofsd.service && \ + systemctl mask \ + virtqemud.service virtqemud.socket virtqemud-ro.socket virtqemud-admin.socket \ + virtproxyd.service virtproxyd.socket virtproxyd-ro.socket virtproxyd-admin.socket \ + virtnetworkd.service virtnetworkd.socket virtnetworkd-ro.socket virtnetworkd-admin.socket \ + virtstoraged.service virtstoraged.socket virtstoraged-ro.socket virtstoraged-admin.socket \ + systemd-logind.service getty.target console-getty.service STOPSIGNAL SIGRTMIN+3 ENTRYPOINT ["/sbin/init"] diff --git a/containerfiles/cluster-image/libvirtd-override.conf b/containerfiles/cluster-image/libvirtd-override.conf new file mode 100644 index 0000000..a6d8221 --- /dev/null +++ b/containerfiles/cluster-image/libvirtd-override.conf @@ -0,0 +1,2 @@ +[Service] +Environment=LIBVIRTD_ARGS="--timeout 0" diff --git a/containerfiles/cluster-image/libvirtd.conf b/containerfiles/cluster-image/libvirtd.conf new file mode 100644 index 0000000..38425e9 --- /dev/null +++ b/containerfiles/cluster-image/libvirtd.conf @@ -0,0 +1,2 @@ +auth_tcp = "none" +log_outputs = "1:stderr" diff --git a/containerfiles/cluster-image/virtqemud-override.conf b/containerfiles/cluster-image/virtqemud-override.conf deleted file mode 100644 index bb5a674..0000000 --- a/containerfiles/cluster-image/virtqemud-override.conf +++ /dev/null @@ -1,2 +0,0 @@ -[Service] -Environment=VIRTQEMUD_ARGS="--timeout 0" diff --git a/containerfiles/cluster-image/virtqemud.conf b/containerfiles/cluster-image/virtqemud.conf deleted file mode 100644 index 3943281..0000000 --- a/containerfiles/cluster-image/virtqemud.conf +++ /dev/null @@ -1,3 +0,0 @@ -listen_tls = 0 -listen_tcp = 0 -log_outputs = "1:stderr" diff --git a/go.mod b/go.mod index 2c16728..10ace7a 100644 --- a/go.mod +++ b/go.mod @@ -16,6 +16,8 @@ require ( k8s.io/api v0.35.0 k8s.io/apimachinery v0.35.0 k8s.io/client-go v0.35.0 + libvirt.org/go/libvirt v1.12003.0 + libvirt.org/go/libvirtxml v1.12002.0 sigs.k8s.io/yaml v1.6.0 ) diff --git a/go.sum b/go.sum index 05a75c3..615d876 100644 --- a/go.sum +++ b/go.sum @@ -648,6 +648,10 @@ k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 h1:Y3gxNAuB0OBLImH611+UDZ k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ= k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 h1:SjGebBtkBqHFOli+05xYbK8YF1Dzkbzn+gDM4X9T4Ck= k8s.io/utils v0.0.0-20251002143259-bc988d571ff4/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +libvirt.org/go/libvirt v1.12003.0 h1:3ek4ObakscdShZRloa9s8/mGhK7xVduqNmAkb15ZEDQ= +libvirt.org/go/libvirt v1.12003.0/go.mod h1:1WiFE8EjZfq+FCVog+rvr1yatKbKZ9FaFMZgEqxEJqQ= +libvirt.org/go/libvirtxml v1.12002.0 h1:NbEHw+R3IZE0vZF1deCQt+6tA+6Io4pAw9RjS7tM4fs= +libvirt.org/go/libvirtxml v1.12002.0/go.mod h1:7Oq2BLDstLr/XtoQD8Fr3mfDNrzlI3utYKySXF2xkng= sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= diff --git a/hack/test-container-image.sh b/hack/test-container-image.sh index 424e3fd..2c87bda 100755 --- a/hack/test-container-image.sh +++ b/hack/test-container-image.sh @@ -2,6 +2,7 @@ set -euo pipefail BINK_IMAGE="${BINK_IMAGE:-ghcr.io/alicefr/bink/bink:latest}" +CLUSTER_IMAGE="${CLUSTER_IMAGE:-ghcr.io/alicefr/bink/cluster:latest}" if [ -n "${CONTAINER_HOST:-}" ]; then PODMAN_SOCK="${CONTAINER_HOST#unix://}" elif [ -S "/run/podman/podman.sock" ]; then @@ -56,6 +57,12 @@ run_test() { # unreachable from inside nested podman networks. Override it so inner aardvark-dns # forwards queries to a public resolver instead. podman exec "${nested_container}" bash -c 'echo "nameserver 8.8.8.8" > /etc/resolv.conf' + # Pre-load locally-built images into the nested container to avoid + # pulling from the registry (which may also be stale). + if podman image exists "${CLUSTER_IMAGE}" 2>/dev/null; then + echo "Loading ${CLUSTER_IMAGE} into nested container..." + podman save "${CLUSTER_IMAGE}" | podman exec -i "${nested_container}" podman load + fi bink_args=(podman exec "${nested_container}" bink) ;; *) diff --git a/internal/config/defaults.go b/internal/config/defaults.go index 9ed2cb4..6400134 100644 --- a/internal/config/defaults.go +++ b/internal/config/defaults.go @@ -37,6 +37,7 @@ const ( ClusterMACPrefix = "52:54:01" DefaultAPIServerPort = 6443 + LibvirtTCPPort = 16509 ServiceCIDR = "10.96.0.0/12" CalicoVersion = "v3.27.0" diff --git a/internal/node/cleanup.go b/internal/node/cleanup.go index 0f687fd..53abef0 100644 --- a/internal/node/cleanup.go +++ b/internal/node/cleanup.go @@ -1,5 +1,8 @@ package node func (n *Node) Cleanup() error { + if n.virsh != nil { + return n.virsh.Close() + } return nil } diff --git a/internal/node/create.go b/internal/node/create.go index cb4b3b5..2620401 100644 --- a/internal/node/create.go +++ b/internal/node/create.go @@ -3,7 +3,6 @@ package node import ( "context" "fmt" - "time" "github.com/bootc-dev/bink/internal/config" "github.com/bootc-dev/bink/internal/podman" @@ -74,16 +73,21 @@ func (n *Node) createContainer(ctx context.Context) error { }, CapAdd: []string{"SYS_ADMIN"}, SelinuxOpts: []string{"disable"}, - } - - if n.IsControlPlane { - opts.PortMappings = []nettypes.PortMapping{ + PortMappings: []nettypes.PortMapping{ { - HostPort: uint16(n.APIPort), - ContainerPort: 6443, + HostPort: 0, + ContainerPort: uint16(config.LibvirtTCPPort), Protocol: "tcp", }, - } + }, + } + + if n.IsControlPlane { + opts.PortMappings = append(opts.PortMappings, nettypes.PortMapping{ + HostPort: uint16(n.APIPort), + ContainerPort: 6443, + Protocol: "tcp", + }) } containerID, err := n.podman.ContainerCreate(ctx, opts) @@ -174,89 +178,41 @@ func (n *Node) createOverlayDisk(ctx context.Context) error { return nil } -func (n *Node) waitForVirtqemud(ctx context.Context) error { - logrus.Debug("Waiting for virtqemud socket...") - for i := range 30 { - if err := ctx.Err(); err != nil { - return err - } - - err := n.podman.ContainerExecQuiet(ctx, n.ContainerName, - []string{"test", "-S", "/var/run/libvirt/virtqemud-sock"}) - if err == nil { - logrus.Debug("virtqemud socket is ready") - return nil - } - if i == 29 { - return fmt.Errorf("virtqemud socket not ready after 30s") - } - select { - case <-ctx.Done(): - return ctx.Err() - case <-time.After(time.Second): - } - } - return nil -} func (n *Node) createVM(ctx context.Context) error { logrus.Infof("Creating VM %s", n.Name) - if err := n.waitForVirtqemud(ctx); err != nil { - return err - } - - overlayDisk := fmt.Sprintf("path=/workspace/%s.qcow2,format=qcow2,bus=virtio", n.Name) - isoPath := fmt.Sprintf("path=/workspace/%s-cloud-init.iso,device=cdrom", n.Name) - - maxMemory := n.MaxMemory - if maxMemory == 0 { - maxMemory = n.Memory + if n.Memory <= 0 || n.VCPUs <= 0 { + return fmt.Errorf("invalid VM configuration: memory=%d vcpus=%d (both must be positive)", n.Memory, n.VCPUs) } - opts := &virsh.VirtInstallOptions{ - Name: n.Name, - Memory: n.Memory, - MaxMemory: maxMemory, - VCPUs: n.VCPUs, - Disks: []string{overlayDisk, isoPath}, - Networks: []virsh.NetworkConfig{ - { - Type: "passt", - Model: "virtio", - PortForward: "2222:22", - }, - { - Type: "mcast", - Model: "virtio", - MAC: n.ClusterMAC, - }, - }, - Filesystems: []virsh.FilesystemConfig{ - { - Source: config.VirtiofsSharedDir, - Target: "cluster_images", - AccessMode: "passthrough", - ReadOnly: false, - }, - }, - XMLModifications: []string{ - "xpath.set=./devices/interface[2]/source/@address=" + config.MulticastAddr, - fmt.Sprintf("xpath.set=./devices/interface[2]/source/@port=%d", config.MulticastPort), - "xpath.set=./devices/filesystem/source/@socket=" + config.VirtiofsSocketPath, - }, + portForwards := []virsh.PortForward{ + {Start: 2222, To: 22}, } - if n.IsControlPlane { - opts.XMLModifications = append(opts.XMLModifications, - "xpath.create=./devices/interface[1]/portForward/range", - "xpath.set=./devices/interface[1]/portForward/range[2]/@start=6443", - "xpath.set=./devices/interface[1]/portForward/range[2]/@to=6443", - ) - } - - if err := n.virsh.VirtInstall(ctx, opts); err != nil { - return fmt.Errorf("creating VM with virt-install: %w", err) + portForwards = append(portForwards, virsh.PortForward{Start: 6443, To: 6443}) + } + + opts := []virsh.DomainOption{ + virsh.WithKVM(), + virsh.WithName(n.Name), + virsh.WithMemory(uint(n.Memory)), + virsh.WithVCPUs(uint(n.VCPUs)), + virsh.WithQ35OS(), + virsh.WithFeatures(), + virsh.WithCPUHostPassthrough(), + virsh.WithMemoryBackingForVirtiofs(), + virsh.WithDisk(fmt.Sprintf("/workspace/%s.qcow2", n.Name), "qcow2", "vda", "virtio"), + virsh.WithCDROM(fmt.Sprintf("/workspace/%s-cloud-init.iso", n.Name)), + virsh.WithPasstInterface(portForwards), + virsh.WithMcastInterface(n.ClusterMAC, config.MulticastAddr, config.MulticastPort), + virsh.WithVirtiofsSocket(config.VirtiofsSocketPath, "cluster_images"), + virsh.WithSerialConsole(), + virsh.WithGuestAgent(), + } + + if err := n.virsh.DefineAndStartDomain(ctx, opts...); err != nil { + return fmt.Errorf("creating VM: %w", err) } logrus.Infof("VM %s created with dual-NIC networking", n.Name) diff --git a/internal/node/node.go b/internal/node/node.go index 1c26391..a7c9dbf 100644 --- a/internal/node/node.go +++ b/internal/node/node.go @@ -167,6 +167,12 @@ func (n *Node) Create(ctx context.Context) error { return fmt.Errorf("creating container: %w", err) } + libvirtPort, err := n.podman.GetPublishedPort(ctx, n.ContainerName, fmt.Sprintf("%d/tcp", config.LibvirtTCPPort)) + if err != nil { + return fmt.Errorf("getting libvirt TCP port: %w", err) + } + n.virsh.SetLibvirtURI(fmt.Sprintf("qemu+tcp://localhost:%d/session", libvirtPort)) + if err := n.setupSSHKeys(ctx); err != nil { return fmt.Errorf("setting up SSH keys: %w", err) } diff --git a/internal/virsh/client.go b/internal/virsh/client.go index c80c313..1de7050 100644 --- a/internal/virsh/client.go +++ b/internal/virsh/client.go @@ -4,14 +4,18 @@ import ( "context" "fmt" "strings" + "time" "github.com/bootc-dev/bink/internal/podman" "github.com/sirupsen/logrus" + libvirt "libvirt.org/go/libvirt" ) type Client struct { containerName string podmanClient *podman.Client + libvirtURI string + conn *libvirt.Connect } func NewClient(containerName string, podmanClient *podman.Client) *Client { @@ -21,76 +25,97 @@ func NewClient(containerName string, podmanClient *podman.Client) *Client { } } -func (c *Client) ExecInContainer(ctx context.Context, args ...string) (string, error) { - return c.podmanClient.ContainerExec(ctx, c.containerName, args) +func (c *Client) SetLibvirtURI(uri string) { + c.libvirtURI = uri } -func (c *Client) VirtInstall(ctx context.Context, opts *VirtInstallOptions) error { - var memArg string - if opts.MaxMemory > 0 && opts.MaxMemory > opts.Memory { - memArg = fmt.Sprintf("memory=%d,currentMemory=%d", opts.MaxMemory, opts.Memory) - } else { - memArg = fmt.Sprintf("%d", opts.Memory) +func (c *Client) connect(ctx context.Context) error { + if c.conn != nil { + alive, err := c.conn.IsAlive() + if err == nil && alive { + return nil + } + if _, err := c.conn.Close(); err != nil { + logrus.Debugf("Closing stale libvirt connection: %v", err) + } + c.conn = nil } - args := []string{ - "virt-install", - "--connect", "qemu:///session", - "--name", opts.Name, - "--memory", memArg, - "--vcpus", fmt.Sprintf("%d", opts.VCPUs), - "--import", - "--os-variant", "fedora-unknown", - "--graphics", "none", - "--console", "pty,target_type=serial", - "--noautoconsole", + if c.libvirtURI == "" { + return fmt.Errorf("libvirt URI not set") } - // Add shared memory support if filesystems are present (required for virtiofs) - if len(opts.Filesystems) > 0 { - args = append(args, "--memorybacking", "source.type=memfd,access.mode=shared") - } + var lastErr error + backoff := 500 * time.Millisecond + deadline := time.Now().Add(30 * time.Second) - for _, disk := range opts.Disks { - args = append(args, "--disk", disk) - } + for time.Now().Before(deadline) { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } - for _, network := range opts.Networks { - netArg := network.Type - if network.Model != "" { - netArg += fmt.Sprintf(",model=%s", network.Model) + conn, err := libvirt.NewConnect(c.libvirtURI) + if err == nil { + c.conn = conn + logrus.Debugf("Connected to libvirt at %s", c.libvirtURI) + return nil } - if network.MAC != "" { - netArg += fmt.Sprintf(",mac=%s", network.MAC) + lastErr = err + logrus.Debugf("Retrying libvirt connection to %s: %v", c.libvirtURI, err) + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(backoff): } - if network.PortForward != "" { - netArg += fmt.Sprintf(",portForward=%s", network.PortForward) + if backoff < 5*time.Second { + backoff *= 2 } - args = append(args, "--network", netArg) } - for _, fs := range opts.Filesystems { - // Build filesystem argument for virt-install - // Explicitly specify virtiofs driver - fsArg := fmt.Sprintf("source.dir=%s,target.dir=%s,driver.type=virtiofs", - fs.Source, fs.Target) + return fmt.Errorf("connecting to libvirt at %s after 30s: %w", c.libvirtURI, lastErr) +} - if fs.ReadOnly { - fsArg += ",readonly=on" - } +func (c *Client) Close() error { + if c.conn != nil { + _, err := c.conn.Close() + c.conn = nil + return err + } + return nil +} - args = append(args, "--filesystem", fsArg) +func (c *Client) DefineAndStartDomain(ctx context.Context, opts ...DomainOption) error { + if err := c.connect(ctx); err != nil { + return fmt.Errorf("connecting to libvirt: %w", err) } - for _, xml := range opts.XMLModifications { - args = append(args, "--xml", xml) + domain := NewDomain(opts...) + xmlStr, err := domain.Marshal() + if err != nil { + return fmt.Errorf("building domain XML: %w", err) } - args = append(args, "--channel", "unix,target.type=virtio,target.name=org.qemu.guest_agent.0") + logrus.Debugf("Defining domain with XML:\n%s", xmlStr) + + dom, err := c.conn.DomainDefineXML(xmlStr) + if err != nil { + return fmt.Errorf("defining domain: %w", err) + } + defer dom.Free() - logrus.Debugf("Creating VM with virt-install: %s", strings.Join(args, " ")) + if err := dom.Create(); err != nil { + return fmt.Errorf("starting domain: %w", err) + } - return c.podmanClient.ContainerExecQuiet(ctx, c.containerName, args) + logrus.Infof("Domain %s defined and started via libvirt", domain.Name) + return nil +} + +func (c *Client) ExecInContainer(ctx context.Context, args ...string) (string, error) { + return c.podmanClient.ContainerExec(ctx, c.containerName, args) } func (c *Client) QemuImgCreate(ctx context.Context, opts *QemuImgCreateOptions) error { @@ -129,4 +154,3 @@ func (c *Client) Genisoimage(ctx context.Context, outputPath, volumeID string, f return c.podmanClient.ContainerExecQuiet(ctx, c.containerName, args) } - diff --git a/internal/virsh/domain.go b/internal/virsh/domain.go new file mode 100644 index 0000000..8a78985 --- /dev/null +++ b/internal/virsh/domain.go @@ -0,0 +1,240 @@ +package virsh + +import ( + "fmt" + + "libvirt.org/go/libvirtxml" +) + +type DomainOption func(d *libvirtxml.Domain) + +func NewDomain(opts ...DomainOption) *libvirtxml.Domain { + domain := &libvirtxml.Domain{} + for _, f := range opts { + f(domain) + } + return domain +} + +func allocateDevices(d *libvirtxml.Domain) { + if d.Devices == nil { + d.Devices = &libvirtxml.DomainDeviceList{} + } +} + +func WithKVM() DomainOption { + return func(d *libvirtxml.Domain) { + d.Type = "kvm" + } +} + +func WithName(name string) DomainOption { + return func(d *libvirtxml.Domain) { + d.Name = name + } +} + +func WithMemory(memory uint) DomainOption { + return func(d *libvirtxml.Domain) { + d.Memory = &libvirtxml.DomainMemory{ + Value: memory, + Unit: "MiB", + } + } +} + +func WithVCPUs(cpus uint) DomainOption { + return func(d *libvirtxml.Domain) { + d.VCPU = &libvirtxml.DomainVCPU{Value: cpus} + } +} + +func WithQ35OS() DomainOption { + return func(d *libvirtxml.Domain) { + d.OS = &libvirtxml.DomainOS{ + Type: &libvirtxml.DomainOSType{ + Arch: "x86_64", + Machine: "q35", + Type: "hvm", + }, + BootDevices: []libvirtxml.DomainBootDevice{ + {Dev: "hd"}, + }, + } + } +} + +func WithFeatures() DomainOption { + return func(d *libvirtxml.Domain) { + d.Features = &libvirtxml.DomainFeatureList{ + ACPI: &libvirtxml.DomainFeature{}, + APIC: &libvirtxml.DomainFeatureAPIC{}, + } + } +} + +func WithCPUHostPassthrough() DomainOption { + return func(d *libvirtxml.Domain) { + d.CPU = &libvirtxml.DomainCPU{ + Mode: "host-passthrough", + } + } +} + +func WithMemoryBackingForVirtiofs() DomainOption { + return func(d *libvirtxml.Domain) { + d.MemoryBacking = &libvirtxml.DomainMemoryBacking{ + MemorySource: &libvirtxml.DomainMemorySource{Type: "memfd"}, + MemoryAccess: &libvirtxml.DomainMemoryAccess{Mode: "shared"}, + } + } +} + +func WithDisk(path, format, dev, bus string) DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + d.Devices.Disks = append(d.Devices.Disks, libvirtxml.DomainDisk{ + Device: "disk", + Driver: &libvirtxml.DomainDiskDriver{ + Name: "qemu", + Type: format, + }, + Source: &libvirtxml.DomainDiskSource{ + File: &libvirtxml.DomainDiskSourceFile{File: path}, + }, + Target: &libvirtxml.DomainDiskTarget{ + Dev: dev, + Bus: bus, + }, + }) + } +} + +func WithCDROM(path string) DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + d.Devices.Disks = append(d.Devices.Disks, libvirtxml.DomainDisk{ + Device: "cdrom", + Driver: &libvirtxml.DomainDiskDriver{ + Name: "qemu", + Type: "raw", + }, + Source: &libvirtxml.DomainDiskSource{ + File: &libvirtxml.DomainDiskSourceFile{File: path}, + }, + Target: &libvirtxml.DomainDiskTarget{ + Dev: "sda", + Bus: "sata", + }, + ReadOnly: &libvirtxml.DomainDiskReadOnly{}, + }) + } +} + +type PortForward struct { + Start int + To int +} + +func WithPasstInterface(portForwards []PortForward) DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + iface := libvirtxml.DomainInterface{ + Source: &libvirtxml.DomainInterfaceSource{ + User: &libvirtxml.DomainInterfaceSourceUser{}, + }, + Model: &libvirtxml.DomainInterfaceModel{Type: "virtio"}, + Backend: &libvirtxml.DomainInterfaceBackend{Type: "passt"}, + } + fwd := libvirtxml.DomainInterfaceSourcePortForward{Proto: "tcp"} + for _, pf := range portForwards { + fwd.Ranges = append(fwd.Ranges, libvirtxml.DomainInterfaceSourcePortForwardRange{ + Start: uint(pf.Start), + To: uint(pf.To), + }) + } + if len(fwd.Ranges) > 0 { + iface.PortForward = []libvirtxml.DomainInterfaceSourcePortForward{fwd} + } + d.Devices.Interfaces = append(d.Devices.Interfaces, iface) + } +} + +func WithMcastInterface(mac, addr string, port int) DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + d.Devices.Interfaces = append(d.Devices.Interfaces, libvirtxml.DomainInterface{ + MAC: &libvirtxml.DomainInterfaceMAC{Address: mac}, + Model: &libvirtxml.DomainInterfaceModel{Type: "virtio"}, + Source: &libvirtxml.DomainInterfaceSource{ + MCast: &libvirtxml.DomainInterfaceSourceMCast{ + Address: addr, + Port: uint(port), + }, + }, + }) + } +} + +func WithVirtiofsSocket(socketPath, target string) DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + d.Devices.Filesystems = append(d.Devices.Filesystems, libvirtxml.DomainFilesystem{ + Driver: &libvirtxml.DomainFilesystemDriver{Type: "virtiofs"}, + Source: &libvirtxml.DomainFilesystemSource{ + Mount: &libvirtxml.DomainFilesystemSourceMount{Socket: socketPath}, + }, + Target: &libvirtxml.DomainFilesystemTarget{Dir: target}, + }) + } +} + +func WithSerialConsole() DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + port0 := uint(0) + d.Devices.Serials = append(d.Devices.Serials, libvirtxml.DomainSerial{ + Source: &libvirtxml.DomainChardevSource{ + Pty: &libvirtxml.DomainChardevSourcePty{}, + }, + Target: &libvirtxml.DomainSerialTarget{ + Type: "isa-serial", + Port: &port0, + }, + }) + d.Devices.Consoles = append(d.Devices.Consoles, libvirtxml.DomainConsole{ + Source: &libvirtxml.DomainChardevSource{ + Pty: &libvirtxml.DomainChardevSourcePty{}, + }, + Target: &libvirtxml.DomainConsoleTarget{ + Type: "serial", + Port: &port0, + }, + }) + } +} + +func WithGuestAgent() DomainOption { + return func(d *libvirtxml.Domain) { + allocateDevices(d) + d.Devices.Channels = append(d.Devices.Channels, libvirtxml.DomainChannel{ + Source: &libvirtxml.DomainChardevSource{ + UNIX: &libvirtxml.DomainChardevSourceUNIX{}, + }, + Target: &libvirtxml.DomainChannelTarget{ + VirtIO: &libvirtxml.DomainChannelTargetVirtIO{ + Name: "org.qemu.guest_agent.0", + }, + }, + }) + } +} + +func MarshalDomainXML(opts ...DomainOption) (string, error) { + domain := NewDomain(opts...) + xml, err := domain.Marshal() + if err != nil { + return "", fmt.Errorf("marshaling domain XML: %w", err) + } + return xml, nil +} diff --git a/internal/virsh/types.go b/internal/virsh/types.go index ebb2d13..b1c8511 100644 --- a/internal/virsh/types.go +++ b/internal/virsh/types.go @@ -1,30 +1,5 @@ package virsh -type VirtInstallOptions struct { - Name string - Memory int - MaxMemory int - VCPUs int - Disks []string - Networks []NetworkConfig - Filesystems []FilesystemConfig - XMLModifications []string -} - -type FilesystemConfig struct { - Source string // Host path (in container) - Target string // Mount tag name for guest - AccessMode string // mapped, passthrough, squash (default: passthrough) - ReadOnly bool -} - -type NetworkConfig struct { - Type string - Model string - MAC string - PortForward string -} - type QemuImgCreateOptions struct { Path string Format string