Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions lib/forkvm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,17 @@ instead of reusing the source identity.
- Network override fields are supplied at snapshot load to bind the fork to its
own TAP device.
- Vsock CID remains stable for snapshot-based flows.
- Fork copies always give the fork its own mem-file (reflink-cloned where the
filesystem supports FICLONE, sparse-copied otherwise), so a fork never
depends on the source snapshot or source instance after creation. Deleting
the source is safe immediately, and the source's later diff snapshots cannot
mutate memory a fork reads.
- When the Firecracker snapshot memory backend is configured as UFFD, UFFD is
used as a one-shot acceleration for the first restore of a newly forked
standby snapshot. The fork initially reuses the source snapshot memory as the
pager backing file instead of cloning the large memory file during fanout.
- That deferred memory clone is paid when the fork later enters standby. Before
Firecracker writes the fork's diff snapshot, Hypeman materializes the fork's
own `snapshot-latest/memory` file from the original backing memory. After that
point the fork has a normal on-disk snapshot base, independent from the source.
standby snapshot. The pager serves pages from the fork's own mem-file;
because forks inherit the source's snapshot cache key and their mem-files are
byte-identical clones, the pager's page cache is shared across all forks of
the same snapshot.
- Subsequent direct restores of that same fork use Firecracker's normal
file-backed memory backend. If that standby fork is itself forked again, the
new child gets its own one-shot UFFD restore.
Expand Down
13 changes: 0 additions & 13 deletions lib/forkvm/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,22 +33,12 @@ type copyState struct {
reflinkDead bool
}

// CopyOptions controls which guest-directory files are copied.
type CopyOptions struct {
SkipRelativePaths map[string]struct{}
}

// CopyGuestDirectory recursively copies a guest directory to a new destination.
// Regular files are cloned via reflink (FICLONE) when the underlying filesystem
// supports it; otherwise we fall back to a sparse extent copy
// (SEEK_DATA/SEEK_HOLE). Runtime sockets and logs are skipped because they are
// host-runtime artifacts.
func CopyGuestDirectory(srcDir, dstDir string) error {
return CopyGuestDirectoryWithOptions(srcDir, dstDir, CopyOptions{})
}

// CopyGuestDirectoryWithOptions is CopyGuestDirectory with optional path skips.
func CopyGuestDirectoryWithOptions(srcDir, dstDir string, opts CopyOptions) error {
srcInfo, err := os.Stat(srcDir)
if err != nil {
return fmt.Errorf("stat source directory: %w", err)
Expand Down Expand Up @@ -78,9 +68,6 @@ func CopyGuestDirectoryWithOptions(srcDir, dstDir string, opts CopyOptions) erro
if relPath == "." {
return nil
}
if _, ok := opts.SkipRelativePaths[filepath.Clean(relPath)]; ok {
return nil
}
if d.IsDir() && shouldSkipDirectory(relPath) {
return filepath.SkipDir
}
Expand Down
20 changes: 0 additions & 20 deletions lib/forkvm/copy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,26 +44,6 @@ func TestCopyGuestDirectory(t *testing.T) {
assert.Equal(t, "metadata.json", linkTarget)
}

func TestCopyGuestDirectoryWithOptionsSkipsRelativePaths(t *testing.T) {
src := filepath.Join(t.TempDir(), "src")
dst := filepath.Join(t.TempDir(), "dst")

require.NoError(t, os.MkdirAll(filepath.Join(src, "snapshots", "snapshot-latest"), 0755))
require.NoError(t, os.WriteFile(filepath.Join(src, "snapshots", "snapshot-latest", "memory"), []byte("memory"), 0644))
require.NoError(t, os.WriteFile(filepath.Join(src, "snapshots", "snapshot-latest", "state"), []byte("state"), 0644))
require.NoError(t, os.WriteFile(filepath.Join(src, "overlay.raw"), []byte("overlay"), 0644))

require.NoError(t, CopyGuestDirectoryWithOptions(src, dst, CopyOptions{
SkipRelativePaths: map[string]struct{}{
filepath.Join("snapshots", "snapshot-latest", "memory"): {},
},
}))

assert.NoFileExists(t, filepath.Join(dst, "snapshots", "snapshot-latest", "memory"))
assert.FileExists(t, filepath.Join(dst, "snapshots", "snapshot-latest", "state"))
assert.FileExists(t, filepath.Join(dst, "overlay.raw"))
}

func TestCopyRegularFile(t *testing.T) {
src := filepath.Join(t.TempDir(), "src", "memory")
dst := filepath.Join(t.TempDir(), "dst", "snapshots", "snapshot-latest", "memory")
Expand Down
8 changes: 3 additions & 5 deletions lib/guestmemory/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,9 @@ func (s *stubHypervisor) Shutdown(ctx context.Context) error { return nil }
func (s *stubHypervisor) GetVMInfo(ctx context.Context) (*hypervisor.VMInfo, error) {
return &hypervisor.VMInfo{State: hypervisor.StateRunning}, nil
}
func (s *stubHypervisor) Pause(ctx context.Context) error { return nil }
func (s *stubHypervisor) Resume(ctx context.Context) error { return nil }
func (s *stubHypervisor) Snapshot(ctx context.Context, destPath string, _ hypervisor.SnapshotOptions) error {
return nil
}
func (s *stubHypervisor) Pause(ctx context.Context) error { return nil }
func (s *stubHypervisor) Resume(ctx context.Context) error { return nil }
func (s *stubHypervisor) Snapshot(ctx context.Context, destPath string) error { return nil }
func (s *stubHypervisor) ResizeMemory(ctx context.Context, bytes int64) error { return nil }
func (s *stubHypervisor) ResizeMemoryAndWait(ctx context.Context, bytes int64, timeout time.Duration) error {
return nil
Expand Down
2 changes: 1 addition & 1 deletion lib/hypervisor/cloudhypervisor/cloudhypervisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ func (c *CloudHypervisor) Resume(ctx context.Context) error {
}

// Snapshot creates a VM snapshot.
func (c *CloudHypervisor) Snapshot(ctx context.Context, destPath string, _ hypervisor.SnapshotOptions) error {
func (c *CloudHypervisor) Snapshot(ctx context.Context, destPath string) error {
snapshotURL := "file://" + destPath
snapshotConfig := vmm.VmSnapshotConfig{DestinationUrl: &snapshotURL}
resp, err := c.client.PutVmSnapshotWithResponse(ctx, snapshotConfig)
Expand Down
31 changes: 0 additions & 31 deletions lib/hypervisor/firecracker/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,37 +101,6 @@ func TestSnapshotLoadParamsSupportsUFFDBackend(t *testing.T) {
assert.Equal(t, "/tmp/pager.sock", load.MemBackend.BackendPath)
}

func TestMaterializeDeferredSnapshotMemory(t *testing.T) {
t.Parallel()

sourcePath := filepath.Join(t.TempDir(), "source-memory")
snapshotDir := filepath.Join(t.TempDir(), "snapshot-latest")
require.NoError(t, os.WriteFile(sourcePath, []byte("memory"), 0644))

require.NoError(t, materializeDeferredSnapshotMemory(snapshotDir, sourcePath))

got, err := os.ReadFile(filepath.Join(snapshotDir, "memory"))
require.NoError(t, err)
assert.Equal(t, []byte("memory"), got)
}

func TestMaterializeDeferredSnapshotMemoryUsesRetainedSnapshotAlternate(t *testing.T) {
t.Parallel()

root := t.TempDir()
sourcePath := filepath.Join(root, "snapshots", "snapshot-base", "memory")
alternatePath := filepath.Join(root, "snapshots", "snapshot-latest", "memory")
destPath := filepath.Join(t.TempDir(), "snapshot-latest")
require.NoError(t, os.MkdirAll(filepath.Dir(alternatePath), 0755))
require.NoError(t, os.WriteFile(alternatePath, []byte("memory"), 0644))

require.NoError(t, materializeDeferredSnapshotMemory(destPath, sourcePath))

got, err := os.ReadFile(filepath.Join(destPath, "memory"))
require.NoError(t, err)
assert.Equal(t, []byte("memory"), got)
}

func TestToBalloonConfig(t *testing.T) {
cfg := hypervisor.VMConfig{
GuestMemory: hypervisor.GuestMemoryConfig{
Expand Down
62 changes: 1 addition & 61 deletions lib/hypervisor/firecracker/firecracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
"strings"
"time"

"github.com/kernel/hypeman/lib/forkvm"
"github.com/kernel/hypeman/lib/hypervisor"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
Expand Down Expand Up @@ -113,76 +112,17 @@ func (f *Firecracker) Resume(ctx context.Context) error {
return nil
}

func (f *Firecracker) Snapshot(ctx context.Context, destPath string, opts hypervisor.SnapshotOptions) error {
func (f *Firecracker) Snapshot(ctx context.Context, destPath string) error {
if err := os.MkdirAll(destPath, 0755); err != nil {
return fmt.Errorf("create snapshot directory: %w", err)
}
if err := materializeDeferredSnapshotMemory(destPath, opts.DeferredMemoryBackingPath); err != nil {
return err
}
params := toSnapshotCreateParams(destPath)
if _, err := f.do(ctx, http.MethodPut, "/snapshot/create", params, http.StatusNoContent); err != nil {
return fmt.Errorf("create snapshot: %w", err)
}
return nil
}

func materializeDeferredSnapshotMemory(destPath, sourcePath string) error {
sourcePath = strings.TrimSpace(sourcePath)
if sourcePath == "" {
return nil
}
targetPath := filepath.Join(destPath, "memory")
if _, err := os.Stat(targetPath); err == nil {
return nil
} else if !os.IsNotExist(err) {
return fmt.Errorf("stat deferred snapshot memory target: %w", err)
}
resolvedSourcePath, err := resolveDeferredSnapshotMemorySourcePath(sourcePath)
if err != nil {
return err
}
if err := forkvm.CopyRegularFile(resolvedSourcePath, targetPath); err != nil {
return fmt.Errorf("materialize deferred snapshot memory: %w", err)
}
return nil
}

func resolveDeferredSnapshotMemorySourcePath(sourcePath string) (string, error) {
if _, err := os.Stat(sourcePath); err == nil {
return sourcePath, nil
} else if !os.IsNotExist(err) {
return "", fmt.Errorf("stat deferred snapshot memory source: %w", err)
}

alternatePath := alternateRetainedSnapshotMemoryPath(sourcePath)
if alternatePath == "" {
return sourcePath, nil
}
if _, err := os.Stat(alternatePath); err == nil {
return alternatePath, nil
} else if !os.IsNotExist(err) {
return "", fmt.Errorf("stat alternate deferred snapshot memory source: %w", err)
}
return sourcePath, nil
}

func alternateRetainedSnapshotMemoryPath(sourcePath string) string {
if filepath.Base(sourcePath) != "memory" {
return ""
}
snapshotDir := filepath.Dir(sourcePath)
snapshotsDir := filepath.Dir(snapshotDir)
switch filepath.Base(snapshotDir) {
case "snapshot-base":
return filepath.Join(snapshotsDir, "snapshot-latest", "memory")
case "snapshot-latest":
return filepath.Join(snapshotsDir, "snapshot-base", "memory")
default:
return ""
}
}

func (f *Firecracker) ResizeMemory(ctx context.Context, bytes int64) error {
return hypervisor.ErrNotSupported
}
Expand Down
6 changes: 1 addition & 5 deletions lib/hypervisor/hypervisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,6 @@ type RestoreOptions struct {
SnapshotMemorySessionID string
}

type SnapshotOptions struct {
DeferredMemoryBackingPath string
}

// ForkNetworkConfig contains network identity fields for fork preparation.
type ForkNetworkConfig struct {
TAPDevice string
Expand Down Expand Up @@ -199,7 +195,7 @@ type Hypervisor interface {

// Snapshot creates a VM snapshot at the given path.
// Check Capabilities().SupportsSnapshot before calling.
Snapshot(ctx context.Context, destPath string, opts SnapshotOptions) error
Snapshot(ctx context.Context, destPath string) error

// ResizeMemory changes the VM's memory allocation.
// Check Capabilities().SupportsHotplugMemory before calling.
Expand Down
2 changes: 1 addition & 1 deletion lib/hypervisor/qemu/qemu.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ func (q *QEMU) Resume(ctx context.Context) error {
// Snapshot creates a VM snapshot using QEMU's migrate-to-file mechanism.
// The VM state is saved to destPath/memory file.
// The VM config is copied to destPath for restore (QEMU requires exact arg match).
func (q *QEMU) Snapshot(ctx context.Context, destPath string, _ hypervisor.SnapshotOptions) error {
func (q *QEMU) Snapshot(ctx context.Context, destPath string) error {
// QEMU uses migrate to file for snapshots
// The "file:" protocol is deprecated in QEMU 7.2+, use "exec:cat > path" instead
memoryFile := destPath + "/memory"
Expand Down
4 changes: 2 additions & 2 deletions lib/hypervisor/tracing.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,14 +217,14 @@ func (h *tracingHypervisor) Resume(ctx context.Context) (err error) {
return h.next.Resume(ctx)
}

func (h *tracingHypervisor) Snapshot(ctx context.Context, destPath string, opts SnapshotOptions) (err error) {
func (h *tracingHypervisor) Snapshot(ctx context.Context, destPath string) (err error) {
ctx, span := startTraceSpan(ctx, h.tracer, "hypervisor.snapshot",
h.spanAttrs(
attribute.String("operation", "snapshot"),
)...,
)
defer func() { finishTraceSpan(span, err) }()
return h.next.Snapshot(ctx, destPath, opts)
return h.next.Snapshot(ctx, destPath)
}

func (h *tracingHypervisor) ResizeMemory(ctx context.Context, bytes int64) (err error) {
Expand Down
16 changes: 6 additions & 10 deletions lib/hypervisor/tracing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,9 @@ func (fakeHypervisor) Shutdown(context.Context) error { return nil }
func (fakeHypervisor) GetVMInfo(context.Context) (*VMInfo, error) {
return &VMInfo{State: StateRunning}, nil
}
func (fakeHypervisor) Pause(context.Context) error { return nil }
func (fakeHypervisor) Resume(context.Context) error { return nil }
func (fakeHypervisor) Snapshot(context.Context, string, SnapshotOptions) error {
return nil
}
func (fakeHypervisor) Pause(context.Context) error { return nil }
func (fakeHypervisor) Resume(context.Context) error { return nil }
func (fakeHypervisor) Snapshot(context.Context, string) error { return nil }
func (fakeHypervisor) ResizeMemory(context.Context, int64) error { return nil }
func (fakeHypervisor) ResizeMemoryAndWait(context.Context, int64, time.Duration) error {
return nil
Expand All @@ -43,11 +41,9 @@ func (fakeHypervisorGetVMInfoError) Shutdown(context.Context) error { return nil
func (fakeHypervisorGetVMInfoError) GetVMInfo(context.Context) (*VMInfo, error) {
return nil, errors.New("vm info failed")
}
func (fakeHypervisorGetVMInfoError) Pause(context.Context) error { return nil }
func (fakeHypervisorGetVMInfoError) Resume(context.Context) error { return nil }
func (fakeHypervisorGetVMInfoError) Snapshot(context.Context, string, SnapshotOptions) error {
return nil
}
func (fakeHypervisorGetVMInfoError) Pause(context.Context) error { return nil }
func (fakeHypervisorGetVMInfoError) Resume(context.Context) error { return nil }
func (fakeHypervisorGetVMInfoError) Snapshot(context.Context, string) error { return nil }
func (fakeHypervisorGetVMInfoError) ResizeMemory(context.Context, int64) error {
return nil
}
Expand Down
2 changes: 1 addition & 1 deletion lib/hypervisor/vz/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ func (c *Client) rawVMState(ctx context.Context) (string, error) {
return info.State, nil
}

func (c *Client) Snapshot(ctx context.Context, destPath string, _ hypervisor.SnapshotOptions) error {
func (c *Client) Snapshot(ctx context.Context, destPath string) error {
req := snapshotRequest{DestinationPath: destPath}
body, err := json.Marshal(req)
if err != nil {
Expand Down
Loading
Loading