diff --git a/cmd/limactl/network.go b/cmd/limactl/network.go index 492237c4d92..dbd7aa64109 100644 --- a/cmd/limactl/network.go +++ b/cmd/limactl/network.go @@ -10,6 +10,7 @@ import ( "maps" "net" "os" + "path/filepath" "slices" "strings" "text/tabwriter" @@ -18,12 +19,17 @@ import ( "github.com/spf13/cobra" "github.com/lima-vm/lima/v2/pkg/networks" + "github.com/lima-vm/lima/v2/pkg/networks/usernet" + "github.com/lima-vm/lima/v2/pkg/networks/usernet/filter" "github.com/lima-vm/lima/v2/pkg/yqutil" ) const networkCreateExample = ` Create a network: $ limactl network create foo --gateway 192.168.42.1/24 + Create a network with policy filtering: + $ limactl network create secure --gateway 192.168.42.1/24 --policy ~/policy.yaml + Connect VM instances to the newly created network: $ limactl create --network lima:foo --name vm1 $ limactl create --network lima:foo --name vm2 @@ -144,6 +150,7 @@ func newNetworkCreateCommand() *cobra.Command { flags.String("gateway", "", "gateway, e.g., \"192.168.42.1/24\"") flags.String("interface", "", "interface for bridged mode") _ = cmd.RegisterFlagCompletionFunc("interface", bashFlagCompleteNetworkInterfaceNames) + flags.String("policy", "", "path to policy file (YAML or JSON, user-v2 mode only)") return cmd } @@ -174,6 +181,38 @@ func networkCreateAction(cmd *cobra.Command, args []string) error { return err } + policyPath, err := flags.GetString("policy") + if err != nil { + return err + } + + // Handle policy file if provided + if policyPath != "" { + // Only user-v2 mode supports filtering + if mode != networks.ModeUserV2 { + logrus.Warnf("Policy filtering is only supported for mode 'user-v2', ignoring --policy flag") + } else { + // Load the policy to validate it + pol, err := filter.LoadPolicy(policyPath) + if err != nil { + return fmt.Errorf("failed to load policy: %w", err) + } + + // Save as JSON in the network directory (~/.lima/_networks//policy.json) + policyJSONPath, err := usernet.PolicyFile(name) + if err != nil { + return fmt.Errorf("failed to get policy path: %w", err) + } + // Ensure network directory exists (follows usernet convention) + if err := os.MkdirAll(filepath.Dir(policyJSONPath), 0o755); err != nil { + return fmt.Errorf("failed to create network directory: %w", err) + } + if err := filter.SavePolicyJSON(pol, policyJSONPath); err != nil { + return fmt.Errorf("failed to save policy: %w", err) + } + } + } + switch mode { case networks.ModeBridged: if gateway != "" { diff --git a/cmd/limactl/usernet.go b/cmd/limactl/usernet.go index 9c11ce39a89..8141244e611 100644 --- a/cmd/limactl/usernet.go +++ b/cmd/limactl/usernet.go @@ -11,9 +11,11 @@ import ( "strconv" "syscall" + "github.com/sirupsen/logrus" "github.com/spf13/cobra" "github.com/lima-vm/lima/v2/pkg/networks/usernet" + "github.com/lima-vm/lima/v2/pkg/networks/usernet/filter" ) func newUsernetCommand() *cobra.Command { @@ -31,6 +33,7 @@ func newUsernetCommand() *cobra.Command { hostagentCommand.Flags().String("subnet", "192.168.5.0/24", "Sets subnet value for the usernet network") hostagentCommand.Flags().Int("mtu", 1500, "mtu") hostagentCommand.Flags().StringToString("leases", nil, "Pass default static leases for startup. Eg: '192.168.104.1=52:55:55:b3:bc:d9,192.168.104.2=5a:94:ef:e4:0c:df' ") + hostagentCommand.Flags().String("policy", "", "Path to policy JSON file") return hostagentCommand } @@ -75,6 +78,22 @@ func usernetAction(cmd *cobra.Command, _ []string) error { return err } + policyPath, err := cmd.Flags().GetString("policy") + if err != nil { + return err + } + + // Parse the policy at the CLI boundary (fail fast on invalid policy) + var policy *filter.Policy + if policyPath != "" { + logrus.Debugf("Loading policy from: %s", policyPath) + policy, err = filter.LoadPolicy(policyPath) + if err != nil { + return fmt.Errorf("failed to load policy: %w", err) + } + logrus.Debugf("Loaded policy with %d rules", len(policy.Rules)) + } + os.RemoveAll(endpoint) os.RemoveAll(qemuSocket) os.RemoveAll(fdSocket) @@ -92,5 +111,6 @@ func usernetAction(cmd *cobra.Command, _ []string) error { FdSocket: fdSocket, Subnet: subnet, DefaultLeases: leases, + Policy: policy, }) } diff --git a/pkg/networks/usernet/config.go b/pkg/networks/usernet/config.go index 359044947dd..8ad072b1f0f 100644 --- a/pkg/networks/usernet/config.go +++ b/pkg/networks/usernet/config.go @@ -112,6 +112,16 @@ func Leases(name string) (string, error) { return sockPath, nil } +// PolicyFile returns the path to the policy JSON file for the given network name. +// For usernet, this is stored in ~/.lima/_networks//policy.json (not VarRun). +func PolicyFile(name string) (string, error) { + dir, err := dirnames.LimaNetworksDir() + if err != nil { + return "", err + } + return filepath.Join(dir, name, "policy.json"), nil +} + func netmaskToCidr(baseIP, netMask net.IP) (net.IP, *net.IPNet, error) { size, _ := net.IPMask(netMask.To4()).Size() return net.ParseCIDR(fmt.Sprintf("%s/%d", baseIP.String(), size)) diff --git a/pkg/networks/usernet/filter/dns.go b/pkg/networks/usernet/filter/dns.go new file mode 100644 index 00000000000..1432715ceaf --- /dev/null +++ b/pkg/networks/usernet/filter/dns.go @@ -0,0 +1,268 @@ +// SPDX-FileCopyrightText: Copyright The Lima Authors +// SPDX-License-Identifier: Apache-2.0 + +package filter + +import ( + "net" + "strings" + "sync" + "time" +) + +const ( + // MaxDNSRecords is the maximum number of DNS records to track. + // This prevents unbounded memory growth in long-running processes. + MaxDNSRecords = 10000 +) + +// DNSRecord represents a DNS query result with TTL. +type DNSRecord struct { + Domain string + IPs []net.IP + ExpireAt time.Time +} + +// Tracker tracks domain to IP mappings from DNS queries. +type Tracker struct { + mu sync.RWMutex + records map[string]*DNSRecord // domain -> record +} + +// NewTracker creates a new DNS tracker. +func NewTracker() *Tracker { + return &Tracker{ + records: make(map[string]*DNSRecord), + } +} + +// SeedLimaInternalDomains pre-populates the tracker with Lima internal domains +// These are special domains that map to Lima network infrastructure: +// - subnet.lima.internal -> the entire Lima subnet (e.g., 192.168.100.0/24) +// - host.lima.internal -> the Lima gateway (e.g., 192.168.100.2) +func (t *Tracker) SeedLimaInternalDomains(subnet, gatewayIP string) error { + if subnet == "" { + return nil + } + + _, subnetNet, err := net.ParseCIDR(subnet) + if err != nil { + return err + } + + // Get all IPs in the subnet for subnet.lima.internal + var subnetIPs []net.IP + // For now, just add the network address + // We could enumerate all IPs but that's expensive for large subnets + subnetIPs = append(subnetIPs, subnetNet.IP) + + // Add subnet.lima.internal -> subnet IPs (never expires) + t.addPreSeededRecord("subnet.lima.internal", subnetIPs) + + // Add host.lima.internal -> Lima gateway (never expires) + // This must be seeded because gvisor's internal DNS server resolves *.lima.internal + // domains internally, so the DNS snooper never sees the responses + if gatewayIP != "" { + gateway := net.ParseIP(gatewayIP) + if gateway != nil { + t.addPreSeededRecord("host.lima.internal", []net.IP{gateway}) + } + } + + return nil +} + +// addPreSeededRecord adds a pre-seeded record that never expires (ExpireAt = zero time). +func (t *Tracker) addPreSeededRecord(domain string, ips []net.IP) { + t.mu.Lock() + defer t.mu.Unlock() + + domain = strings.ToLower(domain) + + t.records[domain] = &DNSRecord{ + Domain: domain, + IPs: ips, + ExpireAt: time.Time{}, // zero = never expires + } +} + +// IsPreSeeded returns true if the domain was pre-seeded (ExpireAt.IsZero()). +func (t *Tracker) IsPreSeeded(domain string) bool { + t.mu.RLock() + defer t.mu.RUnlock() + + record, ok := t.records[strings.ToLower(domain)] + return ok && record.ExpireAt.IsZero() +} + +// AddRecord adds or updates a DNS record (from observed DNS responses). +func (t *Tracker) AddRecord(domain string, ips []net.IP, ttl time.Duration) { + t.mu.Lock() + defer t.mu.Unlock() + + domain = strings.ToLower(domain) + + // If at capacity and this is a new domain, clean up expired entries first + if _, exists := t.records[domain]; !exists && len(t.records) >= MaxDNSRecords { + t.cleanExpiredLocked() + + // If still at capacity after cleanup, remove oldest entry + if len(t.records) >= MaxDNSRecords { + t.removeOldestLocked() + } + } + + t.records[domain] = &DNSRecord{ + Domain: domain, + IPs: ips, + ExpireAt: time.Now().Add(ttl), + } +} + +// GetIPs returns all IPs for a domain, or nil if not found/expired. +func (t *Tracker) GetIPs(domain string) []net.IP { + t.mu.RLock() + defer t.mu.RUnlock() + + domain = strings.ToLower(domain) + record, ok := t.records[domain] + if !ok { + return nil + } + // Skip expired records + if !record.ExpireAt.IsZero() && time.Now().After(record.ExpireAt) { + return nil + } + return record.IPs +} + +// GetIPsForPattern returns all IPs matching a domain pattern (supports wildcards). +// Example: "*.example.com" matches "api.example.com", "cdn.example.com". +func (t *Tracker) GetIPsForPattern(pattern string) []net.IP { + t.mu.RLock() + defer t.mu.RUnlock() + + pattern = strings.ToLower(pattern) + var allIPs []net.IP + seenIPs := make(map[string]bool) + now := time.Now() + + for domain, record := range t.records { + // Skip expired records + if !record.ExpireAt.IsZero() && now.After(record.ExpireAt) { + continue + } + + // Check if domain matches pattern + if matchesPattern(domain, pattern) { + for _, ip := range record.IPs { + ipStr := ip.String() + if !seenIPs[ipStr] { + seenIPs[ipStr] = true + allIPs = append(allIPs, ip) + } + } + } + } + + return allIPs +} + +// GetDomainsForIP returns all domains that resolve to the given IP (reverse lookup). +func (t *Tracker) GetDomainsForIP(ip net.IP) []string { + t.mu.RLock() + defer t.mu.RUnlock() + + var domains []string + now := time.Now() + + for domain, record := range t.records { + // Skip expired records + if !record.ExpireAt.IsZero() && now.After(record.ExpireAt) { + continue + } + + // Check if this domain resolves to the given IP + for _, recordIP := range record.IPs { + if recordIP.Equal(ip) { + domains = append(domains, domain) + break + } + } + } + + return domains +} + +// CleanExpired removes expired DNS records. +func (t *Tracker) CleanExpired() { + t.mu.Lock() + defer t.mu.Unlock() + t.cleanExpiredLocked() +} + +// cleanExpiredLocked removes expired DNS records (must hold lock). +func (t *Tracker) cleanExpiredLocked() { + now := time.Now() + for domain, record := range t.records { + // Skip expired records + if !record.ExpireAt.IsZero() && now.After(record.ExpireAt) { + delete(t.records, domain) + } + } +} + +// removeOldestLocked removes the record with the earliest expiration time (must hold lock). +func (t *Tracker) removeOldestLocked() { + if len(t.records) == 0 { + return + } + + var oldestDomain string + var oldestExpireAt time.Time + first := true + + for domain, record := range t.records { + // Skip pre-seeded records + if record.ExpireAt.IsZero() { + continue + } + if first || record.ExpireAt.Before(oldestExpireAt) { + oldestDomain = domain + oldestExpireAt = record.ExpireAt + first = false + } + } + + if oldestDomain != "" { + delete(t.records, oldestDomain) + } +} + +// matchesPattern checks if a domain matches a pattern with wildcard support. +// Pattern examples: +// - "example.com" matches exactly "example.com" +// - "*.example.com" matches "api.example.com", "cdn.example.com", but NOT "example.com" +// - "*" matches everything +func matchesPattern(domain, pattern string) bool { + // Exact match + if domain == pattern { + return true + } + + // Match all + if pattern == "*" { + return true + } + + // Wildcard pattern + if strings.HasPrefix(pattern, "*.") { + suffix := pattern[2:] // Remove "*." + // Domain must end with the suffix and have at least one more label + if strings.HasSuffix(domain, "."+suffix) { + return true + } + } + + return false +} diff --git a/pkg/networks/usernet/filter/dnssnooper.go b/pkg/networks/usernet/filter/dnssnooper.go new file mode 100644 index 00000000000..41ce5ab6975 --- /dev/null +++ b/pkg/networks/usernet/filter/dnssnooper.go @@ -0,0 +1,191 @@ +// SPDX-FileCopyrightText: Copyright The Lima Authors +// SPDX-License-Identifier: Apache-2.0 + +package filter + +import ( + "encoding/binary" + "net" + "time" + + "gvisor.dev/gvisor/pkg/tcpip/stack" +) + +// dnsSnooper is a matcher that intercepts DNS response packets and tracks domain-to-IP mappings +// It does not block DNS queries - filtering happens at connection time based on tracked FQDNs. +// The parsing code is just following RFC 1035 vs. using a library. This is less than 100 lines +// of fairly simple RFC following code so on balance worth it. +type dnsSnooper struct { + tracker *Tracker +} + +func (*dnsSnooper) Name() string { + return "dnsSnooper" +} + +func (d *dnsSnooper) Match(_ stack.Hook, pkt *stack.PacketBuffer, _, _ string) (matches, hotdrop bool) { + // Only inspect UDP packets + transportHeader := pkt.TransportHeader().Slice() + if len(transportHeader) < 8 { + return false, false + } + + // Check if this is a UDP packet from port 53 (DNS response) + srcPort := binary.BigEndian.Uint16(transportHeader[0:2]) + if srcPort != 53 { + return false, false + } + + // Get the UDP payload (DNS message) + data := pkt.Data().AsRange().ToSlice() + if len(data) < 12 { + return false, false // Too short to be a DNS message + } + + // Parse DNS header + // Byte 2-3: Flags - check if this is a response (QR bit = 1) + flags := binary.BigEndian.Uint16(data[2:4]) + isResponse := (flags & 0x8000) != 0 + if !isResponse { + return false, false + } + + // Get question count and answer count + qdCount := binary.BigEndian.Uint16(data[4:6]) + anCount := binary.BigEndian.Uint16(data[6:8]) + + if anCount == 0 { + return false, false // No answers + } + + // Parse DNS message + domain, ips, ttl := parseDNSResponse(data, qdCount, anCount) + if domain != "" && len(ips) > 0 { + // Track DNS response for future FQDN-based connection filtering + // We don't block DNS queries themselves - only actual connections to unauthorized IPs + // Use minimum TTL of 60 seconds to handle cases where DNS TTL is 0 or very short + trackTTL := ttl + if trackTTL < 60 { + trackTTL = 60 + } + d.tracker.AddRecord(domain, ips, time.Duration(trackTTL)*time.Second) + } + + // Always allow DNS packets - filtering happens at connection time + return false, false +} + +// parseDNSResponse extracts domain, IPs, and TTL from a DNS response packet. +func parseDNSResponse(data []byte, qdCount, anCount uint16) (domain string, ips []net.IP, ttl uint32) { + offset := 12 // Start after DNS header + + // Skip questions to get to answers + for i := uint16(0); i < qdCount && offset < len(data); i++ { + // Parse the domain name from the question + if domain == "" { + domain, offset = parseDNSName(data, offset) + } else { + _, offset = parseDNSName(data, offset) + } + if offset+4 > len(data) { + return "", nil, 0 + } + offset += 4 // Skip QTYPE and QCLASS + } + + // Parse answers + var minTTL uint32 + firstTTL := true + + for i := uint16(0); i < anCount && offset < len(data); i++ { + // Parse name (might be compressed) + _, offset = parseDNSName(data, offset) + if offset+10 > len(data) { + break + } + + rrType := binary.BigEndian.Uint16(data[offset : offset+2]) + offset += 2 // TYPE + offset += 2 // CLASS + + ttl := binary.BigEndian.Uint32(data[offset : offset+4]) + if firstTTL || ttl < minTTL { + minTTL = ttl + firstTTL = false + } + offset += 4 // TTL + + rdLength := binary.BigEndian.Uint16(data[offset : offset+2]) + offset += 2 // RDLENGTH + + if offset+int(rdLength) > len(data) { + break + } + + // Extract IP addresses from A (1) and AAAA (28) records + if rrType == 1 && rdLength == 4 { + // A record (IPv4) + ip := net.IP(data[offset : offset+4]) + ips = append(ips, ip) + } else if rrType == 28 && rdLength == 16 { + // AAAA record (IPv6) + ip := net.IP(data[offset : offset+16]) + ips = append(ips, ip) + } + + offset += int(rdLength) + } + + return domain, ips, minTTL +} + +// parseDNSName parses a DNS name and returns the name and new offset. +func parseDNSName(data []byte, offset int) (name string, newOffset int) { + jumped := false + jumpOffset := 0 + maxJumps := 5 + jumps := 0 + + for offset < len(data) { + length := int(data[offset]) + + // Check for compression (pointer) + if length&0xC0 == 0xC0 { + if offset+1 >= len(data) { + break + } + pointer := int(binary.BigEndian.Uint16(data[offset:offset+2]) & 0x3FFF) + if !jumped { + jumpOffset = offset + 2 + } + offset = pointer + jumped = true + jumps++ + if jumps > maxJumps { + break + } + continue + } + + if length == 0 { + offset++ + break + } + + offset++ + if offset+length > len(data) { + break + } + + if name != "" { + name += "." + } + name += string(data[offset : offset+length]) + offset += length + } + + if jumped { + return name, jumpOffset + } + return name, offset +} diff --git a/pkg/networks/usernet/filter/dnssnooper_test.go b/pkg/networks/usernet/filter/dnssnooper_test.go new file mode 100644 index 00000000000..edd15604669 --- /dev/null +++ b/pkg/networks/usernet/filter/dnssnooper_test.go @@ -0,0 +1,155 @@ +// SPDX-FileCopyrightText: Copyright The Lima Authors +// SPDX-License-Identifier: Apache-2.0 + +package filter + +import ( + "encoding/binary" + "net" + "testing" + "time" + + "gotest.tools/v3/assert" +) + +func TestParseDNSResponse(t *testing.T) { + response := buildDNSResponse("example.com", net.ParseIP("93.184.216.34"), 300) + domain, ips, ttl := parseDNSResponse(response, 1, 1) + + assert.Equal(t, domain, "example.com") + assert.Equal(t, len(ips), 1) + assert.Equal(t, ips[0].String(), "93.184.216.34") + assert.Equal(t, ttl, uint32(300)) +} + +func TestParseDNSResponse_IPv6(t *testing.T) { + response := buildDNSResponse("ipv6.example.com", net.ParseIP("2001:db8::1"), 600) + domain, ips, ttl := parseDNSResponse(response, 1, 1) + + assert.Equal(t, domain, "ipv6.example.com") + assert.Equal(t, len(ips), 1) + assert.Equal(t, ips[0].String(), "2001:db8::1") + assert.Equal(t, ttl, uint32(600)) +} + +func TestParseDNSResponse_MultipleIPs(t *testing.T) { + response := buildDNSResponseMulti("multi.example.com", []net.IP{ + net.ParseIP("192.0.2.1"), + net.ParseIP("192.0.2.2"), + net.ParseIP("192.0.2.3"), + }, 120) + + domain, ips, ttl := parseDNSResponse(response, 1, 3) + + assert.Equal(t, domain, "multi.example.com") + assert.Equal(t, len(ips), 3) + + expectedIPs := []string{"192.0.2.1", "192.0.2.2", "192.0.2.3"} + for i, ip := range ips { + assert.Equal(t, ip.String(), expectedIPs[i]) + } + + assert.Equal(t, ttl, uint32(120)) +} + +func TestDNSSnooperIntegration(t *testing.T) { + tracker := NewTracker() + pol := &Policy{ + Version: "1.0", + Rules: []PolicyRule{ + {Name: "allow-all", Action: "allow", Priority: 100}, + }, + } + + table, err := BuildFilterTable(pol, tracker, "192.168.127.0/24", "192.168.127.1", false) + assert.NilError(t, err) + assert.Assert(t, len(table.Rules) >= 2, "Expected at least 2 rules (snooper + policy)") + + response := buildDNSResponse("github.com", net.ParseIP("140.82.121.4"), 60) + domain, ips, ttl := parseDNSResponse(response, 1, 1) + + assert.Assert(t, domain != "" && len(ips) > 0, "Failed to parse DNS response") + + tracker.AddRecord(domain, ips, time.Duration(ttl)*time.Second) + + trackedIPs := tracker.GetIPs("github.com") + assert.Equal(t, len(trackedIPs), 1) + assert.Equal(t, trackedIPs[0].String(), "140.82.121.4") +} + +// Helper functions to build DNS response packets + +func buildDNSResponse(domain string, ip net.IP, ttl uint32) []byte { + return buildDNSResponseMulti(domain, []net.IP{ip}, ttl) +} + +func buildDNSResponseMulti(domain string, ips []net.IP, ttl uint32) []byte { + buf := make([]byte, 0, 512) + + // DNS Header (12 bytes) + buf = append(buf, 0x00, 0x01) // Transaction ID + buf = append(buf, 0x81, 0x80) // Flags: response, no error + buf = append(buf, 0x00, 0x01) // Questions: 1 + anCount := uint16(len(ips)) + buf = binary.BigEndian.AppendUint16(buf, anCount) // Answers + buf = append(buf, 0x00, 0x00) // Authority RRs: 0 + buf = append(buf, 0x00, 0x00) // Additional RRs: 0 + + // Question section + buf = appendDNSName(buf, domain) + if ips[0].To4() != nil { + buf = append(buf, 0x00, 0x01) // Type A + } else { + buf = append(buf, 0x00, 0x1c) // Type AAAA + } + buf = append(buf, 0x00, 0x01) // Class IN + + // Answer section(s) + for _, ip := range ips { + buf = appendDNSName(buf, domain) + if ip.To4() != nil { + buf = append(buf, 0x00, 0x01) // Type A + buf = append(buf, 0x00, 0x01) // Class IN + buf = binary.BigEndian.AppendUint32(buf, ttl) + buf = append(buf, 0x00, 0x04) // RDLENGTH: 4 + buf = append(buf, ip.To4()...) + } else { + buf = append(buf, 0x00, 0x1c) // Type AAAA + buf = append(buf, 0x00, 0x01) // Class IN + buf = binary.BigEndian.AppendUint32(buf, ttl) + buf = append(buf, 0x00, 0x10) // RDLENGTH: 16 + buf = append(buf, ip.To16()...) + } + } + + return buf +} + +func appendDNSName(buf []byte, domain string) []byte { + labels := splitDomain(domain) + for _, label := range labels { + buf = append(buf, byte(len(label))) + buf = append(buf, []byte(label)...) + } + buf = append(buf, 0x00) // Null terminator + return buf +} + +func splitDomain(domain string) []string { + var labels []string + current := "" + for _, ch := range domain { + if ch == '.' { + if current != "" { + labels = append(labels, current) + current = "" + } + } else { + current += string(ch) + } + } + if current != "" { + labels = append(labels, current) + } + return labels +} diff --git a/pkg/networks/usernet/filter/filter.go b/pkg/networks/usernet/filter/filter.go new file mode 100644 index 00000000000..785d8576806 --- /dev/null +++ b/pkg/networks/usernet/filter/filter.go @@ -0,0 +1,171 @@ +// SPDX-FileCopyrightText: Copyright The Lima Authors +// SPDX-License-Identifier: Apache-2.0 + +package filter + +import ( + "errors" + "fmt" + "net" + "reflect" + "sync" + "time" + "unsafe" + + "github.com/containers/gvisor-tap-vsock/pkg/types" + "github.com/containers/gvisor-tap-vsock/pkg/virtualnetwork" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" +) + +// FilteredVirtualNetwork wraps a virtualnetwork.VirtualNetwork with policy filtering. +type FilteredVirtualNetwork struct { + vn *virtualnetwork.VirtualNetwork + dnsTracker *Tracker + policy *Policy + stack *stack.Stack +} + +// Filter wraps an existing virtual network with policy filtering using a pre-parsed policy. +// This allows the virtual network to be created first, then optionally wrapped with filtering. +// The config parameter should be the same Configuration used to create the VirtualNetwork. +func Filter(vn *virtualnetwork.VirtualNetwork, config *types.Configuration, pol *Policy) (*FilteredVirtualNetwork, error) { + if pol == nil { + return nil, errors.New("policy cannot be nil") + } + + if config == nil { + return nil, errors.New("config cannot be nil") + } + + // Get the network stack from the virtual network using reflection + st, err := getStackFromVirtualNetwork(vn) + if err != nil { + return nil, fmt.Errorf("failed to get stack from virtual network: %w", err) + } + + // Create DNS tracker for domain resolution + dnsTracker := NewTracker() + + // Seed tracker with Lima internal domains (host.lima.internal, subnet.lima.internal) + if err := dnsTracker.SeedLimaInternalDomains(config.Subnet, config.GatewayIP); err != nil { + return nil, fmt.Errorf("failed to seed Lima internal domains: %w", err) + } + + // Apply policy filtering to the stack (iptables for DNS filtering) + if err := ApplyPolicy(st, pol, dnsTracker, config.Subnet, config.GatewayIP); err != nil { + return nil, fmt.Errorf("failed to apply policy: %w", err) + } + + // Install filtered forwarders for TCP/UDP traffic + // This provides pre-NAT filtering at the transport layer + if err := installFilteredForwarders(st, config, pol, dnsTracker); err != nil { + return nil, fmt.Errorf("failed to install filtered forwarders: %w", err) + } + + // NOTE: ICMP filtering by destination IP is not currently supported due to gvisor architecture. + // Unlike TCP/UDP which use forwarders that see pre-NAT destinations, ICMP packets are NAT'd + // before reaching any hook point where we can filter them. The policy can only allow/deny + // ALL ICMP traffic via the "allow-icmp" rule, not filter by specific destinations. + + fvn := &FilteredVirtualNetwork{ + vn: vn, + dnsTracker: dnsTracker, + policy: pol, + stack: st, + } + + // Start a background goroutine to clean up expired DNS entries + go fvn.cleanupExpiredDNS() + + return fvn, nil +} + +// installFilteredForwarders replaces the default TCP/UDP forwarders with filtered versions +// This allows us to see and filter on actual destination IPs before NAT. +func installFilteredForwarders(st *stack.Stack, config *types.Configuration, pol *Policy, tracker *Tracker) error { + // Parse NAT table + var natLock sync.Mutex + nat := parseNATTable(config) + + // Install filtered TCP forwarder + tcpForwarder := FilteredTCPForwarder(st, nat, &natLock, config.Ec2MetadataAccess, pol, tracker, config.Subnet, config.GatewayIP) + st.SetTransportProtocolHandler(tcp.ProtocolNumber, tcpForwarder.HandlePacket) + + // Install filtered UDP forwarder + udpForwarder := FilteredUDPForwarder(st, nat, &natLock, pol, tracker, config.Subnet, config.GatewayIP) + st.SetTransportProtocolHandler(udp.ProtocolNumber, udpForwarder.HandlePacket) + + return nil +} + +func parseNATTable(configuration *types.Configuration) map[tcpip.Address]tcpip.Address { + translation := make(map[tcpip.Address]tcpip.Address) + for source, destination := range configuration.NAT { + translation[tcpip.AddrFrom4Slice(net.ParseIP(source).To4())] = tcpip.AddrFrom4Slice(net.ParseIP(destination).To4()) + } + return translation +} + +// ApplyPolicy applies the policy to a network stack. +func ApplyPolicy(st *stack.Stack, pol *Policy, dnsTracker *Tracker, localSubnet, gatewayIP string) error { + ipt := st.IPTables() + + // Build and apply IPv4 filter table + ipv4Table, err := BuildFilterTable(pol, dnsTracker, localSubnet, gatewayIP, false) + if err != nil { + return fmt.Errorf("failed to build IPv4 filter table: %w", err) + } + ipt.ForceReplaceTable(stack.FilterID, ipv4Table, false) + + // Build and apply IPv6 filter table + ipv6Table, err := BuildFilterTable(pol, dnsTracker, localSubnet, gatewayIP, true) + if err != nil { + return fmt.Errorf("failed to build IPv6 filter table: %w", err) + } + ipt.ForceReplaceTable(stack.FilterID, ipv6Table, true) + + return nil +} + +// cleanupExpiredDNS periodically cleans up expired DNS records. +func (fvn *FilteredVirtualNetwork) cleanupExpiredDNS() { + ticker := time.NewTicker(5 * time.Minute) + defer ticker.Stop() + + for range ticker.C { + fvn.dnsTracker.CleanExpired() + } +} + +// VirtualNetwork returns the underlying virtual network +// This allows the filtered network to be used anywhere a *virtualnetwork.VirtualNetwork is expected. +func (fvn *FilteredVirtualNetwork) VirtualNetwork() *virtualnetwork.VirtualNetwork { + return fvn.vn +} + +// getStackFromVirtualNetwork uses reflection to access the unexported stack field. +func getStackFromVirtualNetwork(vn *virtualnetwork.VirtualNetwork) (*stack.Stack, error) { + vnValue := reflect.ValueOf(vn).Elem() + stackField := vnValue.FieldByName("stack") + + if !stackField.IsValid() { + return nil, errors.New("stack field not found in VirtualNetwork") + } + + // Make the field accessible using unsafe + stackField = reflect.NewAt(stackField.Type(), unsafe.Pointer(stackField.UnsafeAddr())).Elem() + + st, ok := stackField.Interface().(*stack.Stack) + if !ok { + return nil, errors.New("stack field is not of type *stack.Stack") + } + + if st == nil { + return nil, errors.New("stack is nil") + } + + return st, nil +} diff --git a/pkg/networks/usernet/filter/filter_test.go b/pkg/networks/usernet/filter/filter_test.go new file mode 100644 index 00000000000..cc135840e80 --- /dev/null +++ b/pkg/networks/usernet/filter/filter_test.go @@ -0,0 +1,452 @@ +// SPDX-FileCopyrightText: Copyright The Lima Authors +// SPDX-License-Identifier: Apache-2.0 + +package filter + +import ( + "fmt" + "net" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/containers/gvisor-tap-vsock/pkg/types" + "github.com/containers/gvisor-tap-vsock/pkg/virtualnetwork" + "gotest.tools/v3/assert" +) + +// Test helpers + +func newTestConfig() *types.Configuration { + return &types.Configuration{ + Debug: false, + MTU: 1500, + Subnet: "192.168.127.0/24", + GatewayIP: "192.168.127.1", + GatewayMacAddress: "5a:94:ef:e4:0c:dd", + } +} + +func newTestVirtualNetwork(t *testing.T, config *types.Configuration) *virtualnetwork.VirtualNetwork { + t.Helper() + vn, err := virtualnetwork.New(config) + assert.NilError(t, err) + return vn +} + +func newFilteredNetwork(t *testing.T, config *types.Configuration, pol *Policy) *FilteredVirtualNetwork { + t.Helper() + vn := newTestVirtualNetwork(t, config) + fvn, err := Filter(vn, config, pol) + assert.NilError(t, err) + return fvn +} + +func loadPolicyFromString(t *testing.T, policyYAML string) *Policy { + t.Helper() + tmpFile, err := os.CreateTemp(t.TempDir(), "policy-*.yaml") + assert.NilError(t, err) + defer os.Remove(tmpFile.Name()) + + _, err = tmpFile.WriteString(policyYAML) + assert.NilError(t, err) + tmpFile.Close() + + pol, err := LoadPolicy(tmpFile.Name()) + assert.NilError(t, err) + return pol +} + +func TestNewWithPolicy_Integration(t *testing.T) { + pol := loadPolicyFromString(t, `version: "1.0" +rules: + - name: allow-https + action: allow + priority: 10 + egress: + protocols: [tcp] + ports: ["443"] + - name: allow-dns + action: allow + priority: 20 + egress: + protocols: [udp] + ports: ["53"] + - name: block-metadata + action: deny + priority: 5 + egress: + ips: [169.254.169.254/32] + - name: deny-all + action: deny + priority: 1000`) + + config := newTestConfig() + fvn := newFilteredNetwork(t, config, pol) + + assert.Assert(t, fvn != nil) + assert.Assert(t, fvn.VirtualNetwork() != nil) + assert.Assert(t, fvn.stack != nil) + + table := fvn.stack.IPTables().GetTable(0, false) + assert.Assert(t, len(table.Rules) > 0, "Expected rules in filter table") +} + +func TestNewWithPolicy_InvalidPolicy(t *testing.T) { + tmpFile, err := os.CreateTemp(t.TempDir(), "policy-*.yaml") + assert.NilError(t, err) + defer os.Remove(tmpFile.Name()) + + _, err = tmpFile.WriteString(`version: "1.0" +rules: + - name: test + action: invalid-action + priority: 1`) + assert.NilError(t, err) + tmpFile.Close() + + _, err = LoadPolicy(tmpFile.Name()) + assert.Assert(t, err != nil, "Expected error for invalid policy") +} + +func TestPolicyValidation(t *testing.T) { + tests := []struct { + name string + policy string + wantErrMsg string + }{ + {"invalid version", `version: "2.0" +rules: [{name: test, action: allow, priority: 1}]`, "unsupported policy version"}, + {"invalid port range", `version: "1.0" +rules: [{name: test, action: allow, priority: 1, egress: {ports: ["99999"]}}]`, "port 99999 out of range"}, + {"invalid port range format", `version: "1.0" +rules: [{name: test, action: allow, priority: 1, egress: {ports: ["8000-7000"]}}]`, "start port 8000 greater than end port 7000"}, + {"invalid IP", `version: "1.0" +rules: [{name: test, action: allow, priority: 1, egress: {ips: ["not-an-ip"]}}]`, "not a valid IP address or CIDR notation"}, + {"invalid CIDR", `version: "1.0" +rules: [{name: test, action: allow, priority: 1, egress: {ips: ["192.168.1.1/33"]}}]`, "not a valid IP address or CIDR notation"}, + {"valid ICMP rule", `version: "1.0" +rules: [{name: test, action: allow, priority: 1, egress: {protocols: [icmp]}}]`, ""}, + {"valid domain-based allow", `version: "1.0" +rules: [{name: test, action: allow, priority: 1, egress: {domains: ["github.com"], ports: ["443"]}}]`, ""}, + {"valid policy", `version: "1.0" +rules: [{name: test, action: allow, priority: 1, egress: {protocols: [tcp], ports: ["443", "8000-9000"], ips: ["192.168.1.0/24", "10.0.0.1"]}}]`, ""}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tmpFile, err := os.CreateTemp(t.TempDir(), "policy-*.yaml") + assert.NilError(t, err) + defer os.Remove(tmpFile.Name()) + + _, err = tmpFile.WriteString(tt.policy) + assert.NilError(t, err) + tmpFile.Close() + + _, err = LoadPolicy(tmpFile.Name()) + if tt.wantErrMsg != "" { + assert.Assert(t, err != nil, "Expected error but got none") + assert.Assert(t, strings.Contains(err.Error(), tt.wantErrMsg), + "Expected error to contain '%s', got: %v", tt.wantErrMsg, err) + } else { + assert.NilError(t, err) + } + }) + } +} + +func TestNewWithPolicy_MissingPolicyFile(t *testing.T) { + _, err := LoadPolicy("/nonexistent/policy.yaml") + assert.Assert(t, err != nil, "Expected error for missing policy file") +} + +func TestExamplePolicy(t *testing.T) { + policyPath := filepath.Join(".", "policy.yaml") + if _, err := os.Stat(policyPath); err != nil { + t.Skipf("Example policy file not found: %v", err) + } + + pol, err := LoadPolicy(policyPath) + assert.NilError(t, err) + assert.Equal(t, pol.Version, "1.0") + assert.Assert(t, len(pol.Rules) > 0, "Expected at least one rule") + + config := newTestConfig() + fvn := newFilteredNetwork(t, config, pol) + assert.Assert(t, fvn != nil) + assert.Assert(t, fvn.VirtualNetwork() != nil) + + t.Logf("Successfully loaded example policy with %d rules", len(pol.Rules)) + for i, rule := range pol.Rules { + t.Logf(" Rule %d: %s (action=%s, priority=%d)", i+1, rule.Name, rule.Action, rule.Priority) + } +} + +func TestICMPFiltering(t *testing.T) { + pol := loadPolicyFromString(t, `version: "1.0" +rules: + - name: allow-icmp + action: allow + priority: 10 + egress: + protocols: [icmp] + - name: deny-all + action: deny + priority: 100`) + + assert.Equal(t, len(pol.Rules), 2) + assert.Equal(t, pol.Rules[0].Name, "allow-icmp") + assert.Equal(t, len(pol.Rules[0].Egress.Protocols), 1) + assert.Equal(t, pol.Rules[0].Egress.Protocols[0], "icmp") + + config := newTestConfig() + fvn := newFilteredNetwork(t, config, pol) + + table := fvn.stack.IPTables().GetTable(0, false) + assert.Assert(t, len(table.Rules) > 0, "Expected rules in filter table") +} + +func TestDNSTrackingIntegration(t *testing.T) { + pol := loadPolicyFromString(t, `version: "1.0" +rules: + - name: allow-github-api + action: allow + priority: 10 + egress: + protocols: [tcp] + domains: [api.github.com] + ports: ["443"] + - name: allow-wildcards + action: allow + priority: 20 + egress: + protocols: [tcp] + domains: ["*.example.com"] + ports: ["443"] + - name: deny-all + action: deny + priority: 100`) + + tracker := NewTracker() + tracker.AddRecord("api.github.com", []net.IP{net.ParseIP("140.82.121.6")}, 300*time.Second) + tracker.AddRecord("cdn.example.com", []net.IP{net.ParseIP("192.0.2.1")}, 300*time.Second) + tracker.AddRecord("api.example.com", []net.IP{net.ParseIP("192.0.2.2")}, 300*time.Second) + + ips := tracker.GetIPs("api.github.com") + assert.Equal(t, len(ips), 1) + assert.Equal(t, ips[0].String(), "140.82.121.6") + + wildcardIPs := tracker.GetIPsForPattern("*.example.com") + assert.Equal(t, len(wildcardIPs), 2) + + config := newTestConfig() + ipv4Table, err := BuildFilterTable(pol, tracker, config.Subnet, config.GatewayIP, false) + assert.NilError(t, err) + assert.Assert(t, len(ipv4Table.Rules) > 0, "Expected IPv4 rules") + + ipv6Table, err := BuildFilterTable(pol, tracker, config.Subnet, config.GatewayIP, true) + assert.NilError(t, err) + assert.Assert(t, len(ipv6Table.Rules) > 0, "Expected IPv6 rules") +} + +func TestDNSTrackerExpiration(t *testing.T) { + tracker := NewTracker() + tracker.AddRecord("short-lived.com", []net.IP{net.ParseIP("192.0.2.1")}, 1*time.Millisecond) + + ips := tracker.GetIPs("short-lived.com") + assert.Equal(t, len(ips), 1, "Expected IP immediately") + + time.Sleep(10 * time.Millisecond) + + ips = tracker.GetIPs("short-lived.com") + assert.Equal(t, len(ips), 0, "Expected expiration") + + tracker.CleanExpired() + ips = tracker.GetIPs("short-lived.com") + assert.Equal(t, len(ips), 0, "Expected cleanup") +} + +func TestDNSTrackerSizeLimit(t *testing.T) { + tracker := NewTracker() + + const testLimit = 100 + for i := range testLimit { + tracker.AddRecord(fmt.Sprintf("expired%d.example.com", i), + []net.IP{net.ParseIP("192.0.2.1")}, 1*time.Millisecond) + } + + time.Sleep(10 * time.Millisecond) + tracker.AddRecord("new.example.com", []net.IP{net.ParseIP("192.0.2.2")}, 300*time.Second) + + ips := tracker.GetIPs("new.example.com") + assert.Equal(t, len(ips), 1, "Expected new record") + + tracker2 := NewTracker() + for i := range 50 { + tracker2.AddRecord(fmt.Sprintf("test%d.example.com", i), + []net.IP{net.ParseIP("192.0.2.1")}, time.Duration(i+1)*time.Second) + } + + ips = tracker2.GetIPs("test0.example.com") + assert.Equal(t, len(ips), 1, "Expected test0 to exist") +} + +func TestDNSWildcardPatternMatching(t *testing.T) { + tracker := NewTracker() + tracker.AddRecord("example.com", []net.IP{net.ParseIP("192.0.2.1")}, 300*time.Second) + tracker.AddRecord("api.example.com", []net.IP{net.ParseIP("192.0.2.2")}, 300*time.Second) + tracker.AddRecord("cdn.example.com", []net.IP{net.ParseIP("192.0.2.3")}, 300*time.Second) + tracker.AddRecord("api.test.example.com", []net.IP{net.ParseIP("192.0.2.4")}, 300*time.Second) + tracker.AddRecord("other.com", []net.IP{net.ParseIP("192.0.2.5")}, 300*time.Second) + + tests := []struct { + pattern string + want int + desc string + }{ + {"example.com", 1, "exact match"}, + {"*.example.com", 3, "wildcard subdomains"}, + {"*", 5, "star matches all"}, + {"nonexistent.com", 0, "non-existent"}, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + ips := tracker.GetIPsForPattern(tt.pattern) + assert.Equal(t, len(ips), tt.want, "Pattern: %s", tt.pattern) + }) + } +} + +func TestIPv6ICMPFiltering(t *testing.T) { + pol := loadPolicyFromString(t, `version: "1.0" +rules: + - name: allow-icmp + action: allow + priority: 10 + egress: + protocols: [icmp] + - name: deny-all + action: deny + priority: 100`) + + tracker := NewTracker() + config := newTestConfig() + + ipv4Table, err := BuildFilterTable(pol, tracker, config.Subnet, config.GatewayIP, false) + assert.NilError(t, err) + assert.Assert(t, len(ipv4Table.Rules) > 0, "Expected IPv4 rules") + + ipv6Table, err := BuildFilterTable(pol, tracker, config.Subnet, config.GatewayIP, true) + assert.NilError(t, err) + assert.Assert(t, len(ipv6Table.Rules) > 0, "Expected IPv6 rules") +} + +func TestIPv6WithDomains(t *testing.T) { + pol := loadPolicyFromString(t, `version: "1.0" +rules: + - name: allow-ipv6-site + action: allow + priority: 10 + egress: + protocols: [tcp] + domains: [ipv6.example.com] + ports: ["443"] + - name: deny-all + action: deny + priority: 100`) + + tracker := NewTracker() + tracker.AddRecord("ipv6.example.com", []net.IP{ + net.ParseIP("2001:db8::1"), + net.ParseIP("2001:db8::2"), + }, 300*time.Second) + + config := newTestConfig() + ipv6Table, err := BuildFilterTable(pol, tracker, config.Subnet, config.GatewayIP, true) + assert.NilError(t, err) + assert.Assert(t, len(ipv6Table.Rules) > 0, "Expected IPv6 rules") + + ips := tracker.GetIPs("ipv6.example.com") + assert.Equal(t, len(ips), 2, "Expected 2 IPv6 addresses") +} + +func TestDomainBasedDenyRules(t *testing.T) { + tracker := NewTracker() + subnet := "192.168.100.0/24" + gatewayIP := "192.168.100.2" + + // Seed Lima internal domains + err := tracker.SeedLimaInternalDomains(subnet, gatewayIP) + assert.NilError(t, err) + + // Test: deny rule with non-pre-seeded domain should fail + polDenyNonSeeded := loadPolicyFromString(t, `version: "1.0" +rules: + - name: deny-example + action: deny + priority: 10 + egress: + domains: [example.com]`) + + _, err = BuildFilterTable(polDenyNonSeeded, tracker, subnet, gatewayIP, false) + assert.Assert(t, err != nil, "Expected error for non-pre-seeded domain in deny rule") + assert.Assert(t, strings.Contains(err.Error(), "pre-seeded"), "Error should mention pre-seeded: %v", err) + + // Test: deny rule with pre-seeded domain should succeed + polDenySeeded := loadPolicyFromString(t, `version: "1.0" +rules: + - name: deny-host + action: deny + priority: 10 + egress: + domains: [host.lima.internal]`) + + table, err := BuildFilterTable(polDenySeeded, tracker, subnet, gatewayIP, false) + assert.NilError(t, err) + assert.Assert(t, len(table.Rules) > 0, "Expected rules in filter table") + + // Test: deny rule with both seeded and non-seeded domains should fail + polDenyMixed := loadPolicyFromString(t, `version: "1.0" +rules: + - name: deny-mixed + action: deny + priority: 10 + egress: + domains: [host.lima.internal, example.com]`) + + _, err = BuildFilterTable(polDenyMixed, tracker, subnet, gatewayIP, false) + assert.Assert(t, err != nil, "Expected error for mixed seeded/non-seeded domains") + assert.Assert(t, strings.Contains(err.Error(), "example.com"), "Error should mention the non-seeded domain") +} + +func TestLimaInternalDomains(t *testing.T) { + tracker := NewTracker() + subnet := "192.168.100.0/24" + gatewayIP := "192.168.100.2" + + err := tracker.SeedLimaInternalDomains(subnet, gatewayIP) + assert.NilError(t, err) + + subnetIPs := tracker.GetIPs("subnet.lima.internal") + assert.Assert(t, len(subnetIPs) > 0, "subnet.lima.internal should resolve") + + hostIPs := tracker.GetIPs("host.lima.internal") + assert.Equal(t, len(hostIPs), 1, "host.lima.internal should have one IP") + assert.Equal(t, hostIPs[0].String(), gatewayIP) + + pol := loadPolicyFromString(t, `version: "1.0" +rules: + - name: allow-lima-subnet + priority: 10 + action: allow + egress: + domains: [subnet.lima.internal] + protocols: [tcp]`) + + table, err := BuildFilterTable(pol, tracker, subnet, gatewayIP, false) + assert.NilError(t, err) + assert.Assert(t, len(table.Rules) > 0, "Expected rules in filter table") +} diff --git a/pkg/networks/usernet/filter/forwarder.go b/pkg/networks/usernet/filter/forwarder.go new file mode 100644 index 00000000000..3f55a279967 --- /dev/null +++ b/pkg/networks/usernet/filter/forwarder.go @@ -0,0 +1,358 @@ +// SPDX-FileCopyrightText: Copyright The Lima Authors +// SPDX-License-Identifier: Apache-2.0 + +package filter + +import ( + "context" + "fmt" + "net" + "sync" + "time" + + "github.com/containers/gvisor-tap-vsock/pkg/services/forwarder" + "github.com/containers/gvisor-tap-vsock/pkg/tcpproxy" + "github.com/sirupsen/logrus" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/adapters/gonet" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/pkg/waiter" +) + +const linkLocalSubnet = "169.254.0.0/16" + +// FilterContext holds pre-parsed values for efficient filtering +// These values are computed once at forwarder creation time to avoid +// repeated string parsing in the hot path (called for every packet). +type FilterContext struct { + gatewayOne net.IP // Pre-calculated .1 IP from subnet (allows all TCP/UDP) + limaGateway net.IP // Pre-parsed Lima gateway IP (allows only UDP:53) + policy *Policy + tracker *Tracker +} + +// newFilterContext creates a filter context with pre-parsed values. +func newFilterContext(pol *Policy, tracker *Tracker, localSubnet, gatewayIP string) *FilterContext { + ctx := &FilterContext{ + policy: pol, + tracker: tracker, + } + + // Pre-parse .1 from subnet + if localSubnet != "" { + _, parsedSubnet, err := net.ParseCIDR(localSubnet) + if err == nil { + networkIP := parsedSubnet.IP.To4() + if networkIP != nil { + ctx.gatewayOne = net.IPv4(networkIP[0], networkIP[1], networkIP[2], 1) + } + } + } + + // Pre-parse Lima gateway + if gatewayIP != "" { + ctx.limaGateway = net.ParseIP(gatewayIP) + } + + return ctx +} + +// FilteredTCPForwarder creates a TCP forwarder with policy filtering +// It checks destination IPs BEFORE NAT, allowing us to block direct IP access. +func FilteredTCPForwarder(s *stack.Stack, nat map[tcpip.Address]tcpip.Address, natLock *sync.Mutex, ec2MetadataAccess bool, pol *Policy, tracker *Tracker, localSubnet, gatewayIP string) *tcp.Forwarder { + // Pre-parse all gateway IPs once for performance + filterContext := newFilterContext(pol, tracker, localSubnet, gatewayIP) + return tcp.NewForwarder(s, 0, 10, func(r *tcp.ForwarderRequest) { + localAddress := r.ID().LocalAddress + localPort := r.ID().LocalPort + + // EC2 metadata check + if (!ec2MetadataAccess) && linkLocal().Contains(localAddress) { + r.Complete(true) + return + } + + // Convert to net.IP for policy checking + destIP := net.IP(localAddress.AsSlice()) + + // Check policy BEFORE NAT - this is where we can see the real destination! + if !isDestinationAllowed(destIP, localPort, "tcp", filterContext) { + logrus.Infof("[egress-filter] Blocked TCP connection to %s:%d (policy violation)", destIP, localPort) + r.Complete(true) + return + } + + // Apply NAT translation + natLock.Lock() + if replaced, ok := nat[localAddress]; ok { + localAddress = replaced + } + natLock.Unlock() + + // Forward the connection + var d net.Dialer + outbound, err := d.DialContext(context.Background(), "tcp", net.JoinHostPort(localAddress.String(), fmt.Sprintf("%d", localPort))) + if err != nil { + logrus.Tracef("net.DialContext() = %v", err) + r.Complete(true) + return + } + + var wq waiter.Queue + ep, tcpErr := r.CreateEndpoint(&wq) + r.Complete(false) + if tcpErr != nil { + if _, ok := tcpErr.(*tcpip.ErrConnectionRefused); ok { + logrus.Debugf("r.CreateEndpoint() = %v", tcpErr) + } else { + logrus.Errorf("r.CreateEndpoint() = %v", tcpErr) + } + return + } + + remote := tcpproxy.DialProxy{ + DialContext: func(_ context.Context, _, _ string) (net.Conn, error) { + return outbound, nil + }, + } + remote.HandleConn(gonet.NewTCPConn(&wq, ep)) + }) +} + +// FilteredUDPForwarder creates a UDP forwarder with policy filtering. +func FilteredUDPForwarder(s *stack.Stack, nat map[tcpip.Address]tcpip.Address, natLock *sync.Mutex, pol *Policy, tracker *Tracker, localSubnet, gatewayIP string) *udp.Forwarder { + // Pre-parse all gateway IPs once for performance + filterContext := newFilterContext(pol, tracker, localSubnet, gatewayIP) + return udp.NewForwarder(s, func(r *udp.ForwarderRequest) { + localAddress := r.ID().LocalAddress + localPort := r.ID().LocalPort + + // Skip DNS - it's handled by gvisor's internal DNS server + // DNS queries should not be forwarded via net.Dial + if localPort == 53 { + // Don't handle this - let it fall through to gvisor's DNS handling + // which is NOT handled by forwarders + return + } + + // Link-local and broadcast check + if linkLocal().Contains(localAddress) || localAddress == header.IPv4Broadcast { + return + } + + // Convert to net.IP for policy checking + destIP := net.IP(localAddress.AsSlice()) + + // Check policy BEFORE NAT + if !isDestinationAllowed(destIP, localPort, "udp", filterContext) { + logrus.Infof("[egress-filter] Blocked UDP connection to %s:%d (policy violation)", destIP, localPort) + return + } + + // Apply NAT translation + natLock.Lock() + if replaced, ok := nat[localAddress]; ok { + localAddress = replaced + } + natLock.Unlock() + + var wq waiter.Queue + ep, tcpErr := r.CreateEndpoint(&wq) + if tcpErr != nil { + if _, ok := tcpErr.(*tcpip.ErrConnectionRefused); ok { + logrus.Debugf("r.CreateEndpoint() = %v", tcpErr) + } else { + logrus.Errorf("r.CreateEndpoint() = %v", tcpErr) + } + return + } + + // Use the UDP proxy from gvisor-tap-vsock + p, _ := forwarder.NewUDPProxy(&udpConnAdapter{underlying: gonet.NewUDPConn(&wq, ep)}, func() (net.Conn, error) { + var d net.Dialer + return d.DialContext(context.Background(), "udp", net.JoinHostPort(localAddress.String(), fmt.Sprintf("%d", localPort))) + }) + go func() { + p.Run() + ep.Close() + }() + }) +} + +// isDestinationAllowed checks if a destination IP:port:protocol is allowed by policy +// This function is called for EVERY TCP/UDP packet, so it's highly optimized: +// 1. Fast-path checks using byte comparisons (no parsing) +// 2. Pre-parsed gateway IPs from FilterContext (no string operations) +// 3. Early returns to minimize work. +func isDestinationAllowed(destIP net.IP, destPort uint16, protocol string, filterContext *FilterContext) bool { + // Fast-path: IPv4 gateway checks using byte-level comparisons + // This avoids the overhead of net.IP.Equal() for the most common cases + // We normalize to 4-byte representation for consistent comparison + ip4 := destIP.To4() + if ip4 != nil { + // Check if destIP matches pre-calculated .1 gateway + if filterContext.gatewayOne != nil { + gw1 := filterContext.gatewayOne.To4() + if gw1 != nil && + ip4[0] == gw1[0] && ip4[1] == gw1[1] && + ip4[2] == gw1[2] && ip4[3] == gw1[3] { + // Allow all TCP and UDP to .1 + if protocol == "tcp" || protocol == "udp" { + return true + } + } + } + + // Check if destIP matches Lima gateway (.2) for DNS + if filterContext.limaGateway != nil { + gw2 := filterContext.limaGateway.To4() + if gw2 != nil && + ip4[0] == gw2[0] && ip4[1] == gw2[1] && + ip4[2] == gw2[2] && ip4[3] == gw2[3] { + // Only allow UDP port 53 to Lima gateway + if protocol == "udp" && destPort == 53 { + return true + } + } + } + } + + // Also allow loopback and link-local for basic connectivity + if destIP.IsLoopback() || destIP.IsLinkLocalUnicast() { + return true + } + + // Check each policy rule in order + for _, rule := range filterContext.policy.Rules { + if ruleMatches(rule, destIP, destPort, protocol, filterContext.tracker) { + if rule.IsAllowRule() { + return true + } + // Deny rule matched + return false + } + } + + // No rule matched - default deny + return false +} + +// ruleMatches checks if a policy rule matches the given destination. +func ruleMatches(rule PolicyRule, destIP net.IP, destPort uint16, protocol string, tracker *Tracker) bool { + if rule.MatchesAll() { + return true + } + + if rule.Egress == nil { + return false + } + + // Check protocol + if len(rule.Egress.Protocols) > 0 { + protocolMatches := false + for _, p := range rule.Egress.Protocols { + if p == protocol { + protocolMatches = true + break + } + } + if !protocolMatches { + return false + } + } + + // Check port + if len(rule.Egress.Ports) > 0 { + portMatches := false + for _, portStr := range rule.Egress.Ports { + start, end, err := parsePortRange(portStr) + if err != nil { + continue + } + if destPort >= start && destPort <= end { + portMatches = true + break + } + } + if !portMatches { + return false + } + } + + // Check IP ranges + if len(rule.Egress.IPs) > 0 { + ipMatches := false + for _, ipStr := range rule.Egress.IPs { + _, ipNet, err := net.ParseCIDR(ipStr) + if err != nil { + // Try as single IP + if parsedIP := net.ParseIP(ipStr); parsedIP != nil { + if parsedIP.Equal(destIP) { + ipMatches = true + break + } + } + continue + } + if ipNet.Contains(destIP) { + ipMatches = true + break + } + } + if ipMatches { + return true // IP matched, rule applies + } + } + + // Check domain patterns (via DNS tracker) + if len(rule.Egress.Domains) > 0 { + domains := tracker.GetDomainsForIP(destIP) + for _, domain := range domains { + for _, pattern := range rule.Egress.Domains { + if matchesPattern(domain, pattern) { + return true // Domain matched, rule applies + } + } + } + } + + // If rule has IPs or domains but none matched, rule doesn't apply + if len(rule.Egress.IPs) > 0 || len(rule.Egress.Domains) > 0 { + return false + } + + // Rule has protocol/port restrictions but no IP/domain restrictions + // If we got here, protocol and port matched + return true +} + +func linkLocal() *tcpip.Subnet { + _, parsedSubnet, _ := net.ParseCIDR(linkLocalSubnet) + subnet, _ := tcpip.NewSubnet(tcpip.AddrFromSlice(parsedSubnet.IP), tcpip.MaskFromBytes(parsedSubnet.Mask)) + return &subnet +} + +// udpConnAdapter wraps gonet.UDPConn to satisfy the udpConn interface needed by forwarder.NewUDPProxy +// Unfortunately the forwarder package doesn't export its udpConn and autoStoppingListener types. +type udpConnAdapter struct { + underlying *gonet.UDPConn +} + +func (u *udpConnAdapter) ReadFrom(b []byte) (int, net.Addr, error) { + return u.underlying.ReadFrom(b) +} + +func (u *udpConnAdapter) WriteTo(b []byte, addr net.Addr) (int, error) { + return u.underlying.WriteTo(b, addr) +} + +func (u *udpConnAdapter) SetReadDeadline(t time.Time) error { + return u.underlying.SetReadDeadline(t) +} + +func (u *udpConnAdapter) Close() error { + return u.underlying.Close() +} diff --git a/pkg/networks/usernet/filter/policy.go b/pkg/networks/usernet/filter/policy.go new file mode 100644 index 00000000000..094248c3142 --- /dev/null +++ b/pkg/networks/usernet/filter/policy.go @@ -0,0 +1,229 @@ +// SPDX-FileCopyrightText: Copyright The Lima Authors +// SPDX-License-Identifier: Apache-2.0 + +package filter + +import ( + "encoding/json" + "errors" + "fmt" + "net" + "os" + "path/filepath" + "sort" + "strconv" + "strings" + + "gopkg.in/yaml.v3" +) + +// Policy represents the complete network policy. +type Policy struct { + Version string `yaml:"version" json:"version"` + Rules []PolicyRule `yaml:"rules" json:"rules"` +} + +// PolicyRule represents a single filtering rule. +type PolicyRule struct { + Name string `yaml:"name" json:"name"` + Action string `yaml:"action" json:"action"` // "allow" or "deny" + Priority int `yaml:"priority" json:"priority"` + Egress *PolicyMatch `yaml:"egress,omitempty" json:"egress,omitempty"` // nil = match all traffic +} + +// PolicyMatch specifies what traffic the rule matches. +type PolicyMatch struct { + Protocols []string `yaml:"protocols,omitempty" json:"protocols,omitempty"` // tcp, udp, icmp + Domains []string `yaml:"domains,omitempty" json:"domains,omitempty"` // supports wildcards (*.example.com) + IPs []string `yaml:"ips,omitempty" json:"ips,omitempty"` // supports CIDR notation + Ports []string `yaml:"ports,omitempty" json:"ports,omitempty"` // single ports or ranges (8000-9000) +} + +// IsAllowRule returns true if the rule is an allow rule. +func (r *PolicyRule) IsAllowRule() bool { + return r.Action == "allow" +} + +// IsDenyRule returns true if the rule is a deny rule. +func (r *PolicyRule) IsDenyRule() bool { + return r.Action == "deny" +} + +// MatchesAll returns true if the rule matches all traffic (egress is nil or all fields empty). +func (r *PolicyRule) MatchesAll() bool { + if r.Egress == nil { + return true + } + return len(r.Egress.Protocols) == 0 && + len(r.Egress.Domains) == 0 && + len(r.Egress.IPs) == 0 && + len(r.Egress.Ports) == 0 +} + +// LoadPolicy loads and parses an egress policy from a YAML or JSON file. +// Automatically detects format based on file extension. +func LoadPolicy(path string) (*Policy, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("failed to read policy file: %w", err) + } + + var policy Policy + ext := strings.ToLower(filepath.Ext(path)) + + switch ext { + case ".json": + if err := json.Unmarshal(data, &policy); err != nil { + return nil, fmt.Errorf("failed to parse policy JSON: %w", err) + } + case ".yaml", ".yml": + if err := yaml.Unmarshal(data, &policy); err != nil { + return nil, fmt.Errorf("failed to parse policy YAML: %w", err) + } + default: + // Try YAML first, then JSON + if err := yaml.Unmarshal(data, &policy); err != nil { + if jsonErr := json.Unmarshal(data, &policy); jsonErr != nil { + return nil, fmt.Errorf("failed to parse policy as YAML: %w, or JSON: %w", err, jsonErr) + } + } + } + + if err := validatePolicy(&policy); err != nil { + return nil, fmt.Errorf("invalid policy: %w", err) + } + + // Sort rules by priority (lower number = higher priority) + sort.Slice(policy.Rules, func(i, j int) bool { + return policy.Rules[i].Priority < policy.Rules[j].Priority + }) + + return &policy, nil +} + +// SavePolicyJSON saves a policy to a JSON file with nice formatting. +func SavePolicyJSON(policy *Policy, path string) error { + data, err := json.MarshalIndent(policy, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal policy to JSON: %w", err) + } + + if err := os.WriteFile(path, data, 0o644); err != nil { + return fmt.Errorf("failed to write policy file: %w", err) + } + + return nil +} + +// validatePolicy checks if the policy is valid. +func validatePolicy(policy *Policy) error { + if policy.Version == "" { + return errors.New("version field is required") + } + + // Only version 1.0 is currently supported + if policy.Version != "1.0" { + return fmt.Errorf("unsupported policy version: %s (only '1.0' is supported)", policy.Version) + } + + if len(policy.Rules) == 0 { + return errors.New("at least one rule is required") + } + + ruleNames := make(map[string]bool) + for i, rule := range policy.Rules { + // Check for unique rule names + if rule.Name == "" { + return fmt.Errorf("rule at index %d: name is required", i) + } + if ruleNames[rule.Name] { + return fmt.Errorf("duplicate rule name: %s", rule.Name) + } + ruleNames[rule.Name] = true + + // Validate action + if rule.Action != "allow" && rule.Action != "deny" { + return fmt.Errorf("rule '%s': action must be 'allow' or 'deny', got '%s'", rule.Name, rule.Action) + } + + // Validate egress match criteria + // Note: Domain-based deny rule validation deferred to BuildFilterTable (needs DNS tracker) + if rule.Egress != nil { + // Validate protocols + for _, proto := range rule.Egress.Protocols { + if proto != "tcp" && proto != "udp" && proto != "icmp" { + return fmt.Errorf("rule '%s': invalid protocol '%s' (must be tcp, udp, or icmp)", rule.Name, proto) + } + } + + // Validate ports + for _, portStr := range rule.Egress.Ports { + if err := validatePortRange(portStr); err != nil { + return fmt.Errorf("rule '%s': invalid port specification '%s': %w", rule.Name, portStr, err) + } + } + + // Validate IPs/CIDRs + for _, ipStr := range rule.Egress.IPs { + if err := validateIPOrCIDR(ipStr); err != nil { + return fmt.Errorf("rule '%s': invalid IP or CIDR '%s': %w", rule.Name, ipStr, err) + } + } + } + } + + return nil +} + +// validatePortRange validates a port string (single port or range). +func validatePortRange(portStr string) error { + if strings.Contains(portStr, "-") { + parts := strings.Split(portStr, "-") + if len(parts) != 2 { + return errors.New("invalid port range format") + } + start, err := strconv.Atoi(parts[0]) + if err != nil { + return fmt.Errorf("invalid start port: %w", err) + } + end, err := strconv.Atoi(parts[1]) + if err != nil { + return fmt.Errorf("invalid end port: %w", err) + } + if start < 1 || start > 65535 { + return fmt.Errorf("start port %d out of range (1-65535)", start) + } + if end < 1 || end > 65535 { + return fmt.Errorf("end port %d out of range (1-65535)", end) + } + if start > end { + return fmt.Errorf("start port %d greater than end port %d", start, end) + } + } else { + port, err := strconv.Atoi(portStr) + if err != nil { + return fmt.Errorf("invalid port: %w", err) + } + if port < 1 || port > 65535 { + return fmt.Errorf("port %d out of range (1-65535)", port) + } + } + return nil +} + +// validateIPOrCIDR validates an IP address or CIDR notation. +func validateIPOrCIDR(ipStr string) error { + // Try parsing as CIDR first + _, _, err := net.ParseCIDR(ipStr) + if err == nil { + return nil + } + + // Try parsing as IP + ip := net.ParseIP(ipStr) + if ip == nil { + return errors.New("not a valid IP address or CIDR notation") + } + + return nil +} diff --git a/pkg/networks/usernet/filter/policy.yaml b/pkg/networks/usernet/filter/policy.yaml new file mode 100644 index 00000000000..444593fec45 --- /dev/null +++ b/pkg/networks/usernet/filter/policy.yaml @@ -0,0 +1,73 @@ +# Example network policy for Lima user-v2 networking +# +# This policy demonstrates egress (outbound) traffic filtering with: +# - Domain-based rules (with wildcard support) +# - Protocol and port filtering +# - IP/CIDR-based rules +# - Rule priority and precedence +# +# Rules are evaluated in priority order (lower priority number = higher precedence). +# First matching rule determines the action (allow/deny). +# If no rules match, traffic is denied by default. +# +# See: https://lima-vm.io/docs/config/network/policy/ + +version: "1.0" +rules: +# Block access to cloud provider metadata services (AWS/Azure/GCP IMDSv1) +- name: deny-cloud-metadata + action: deny + priority: 10 + egress: + ips: + - 169.254.169.254/32 + - fd00:ec2::254/128 + +# Prevent VM from accessing the host machine +- name: deny-lima-host + action: deny + priority: 10 + egress: + domains: + - host.lima.internal + +# Allow VM-to-VM communication within the Lima network +- name: allow-lima-subnet + action: allow + priority: 20 + egress: + domains: + - subnet.lima.internal + +# Block RFC 1918 private networks to isolate VM from host's local networks +- name: deny-private-subnets + action: deny + priority: 30 + egress: + ips: + - 10.0.0.0/8 + - 172.16.0.0/12 + - 192.168.0.0/16 + +# Allow DNS queries (UDP) and zone transfers (TCP) +- name: allow-dns + action: allow + priority: 50 + egress: + protocols: + - tcp + - udp + ports: + - "53" + +# Allow common outbound services (SSH, HTTP, HTTPS) +- name: allow-http-https-ssh + action: allow + priority: 50 + egress: + protocols: + - tcp + ports: + - "22" + - "80" + - "443" diff --git a/pkg/networks/usernet/filter/rules.go b/pkg/networks/usernet/filter/rules.go new file mode 100644 index 00000000000..6386589d41f --- /dev/null +++ b/pkg/networks/usernet/filter/rules.go @@ -0,0 +1,670 @@ +// SPDX-FileCopyrightText: Copyright The Lima Authors +// SPDX-License-Identifier: Apache-2.0 + +package filter + +import ( + "fmt" + "net" + "strconv" + "strings" + + "github.com/sirupsen/logrus" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" +) + +// BuildFilterTable creates a filter table from a policy +// The table will have OUTPUT chain rules for policy filtering +// INPUT and FORWARD chains accept all traffic since we only filter egress +// localSubnet is used to calculate .1 (network gateway) which allows all TCP/UDP traffic +// gatewayIP is the Lima gateway which only allows UDP port 53. +func BuildFilterTable(pol *Policy, dnsTracker *Tracker, localSubnet, gatewayIP string, ipv6 bool) (stack.Table, error) { + var rules []stack.Rule + var networkProto tcpip.NetworkProtocolNumber + if ipv6 { + networkProto = header.IPv6ProtocolNumber + } else { + networkProto = header.IPv4ProtocolNumber + } + + // Rule 0: Accept all INPUT traffic (we only filter OUTPUT/egress) + inputAcceptIndex := 0 + rules = append(rules, stack.Rule{ + Filter: emptyFilter(ipv6), + Target: &stack.AcceptTarget{NetworkProtocol: networkProto}, + }) + + // Rule 1: Accept all FORWARD traffic (we only filter OUTPUT/egress) + forwardAcceptIndex := 1 + rules = append(rules, stack.Rule{ + Filter: emptyFilter(ipv6), + Target: &stack.AcceptTarget{NetworkProtocol: networkProto}, + }) + + // OUTPUT chain starts here + outputChainStart := len(rules) + + // Add DNS snooper as first OUTPUT rule + // This tracks DNS responses for FQDN-based connection filtering + // Note: dnsSnooper never returns true, so packets are never dropped here + rules = append(rules, stack.Rule{ + Filter: emptyFilter(ipv6), + Matchers: []stack.Matcher{&dnsSnooper{tracker: dnsTracker}}, + Target: &stack.DropTarget{NetworkProtocol: networkProto}, + }) + + // Built-in rule: Always allow DHCP for IPv4 + // DHCP is essential for VM boot - without it, the VM cannot get an IP address + if !ipv6 { + _, broadcastNet, _ := net.ParseCIDR("255.255.255.255/32") + rules = append(rules, stack.Rule{ + Filter: emptyFilter(ipv6), + Matchers: []stack.Matcher{ + &protocolMatcher{protocol: uint8(header.UDPProtocolNumber)}, + &portMatcher{startPort: 67, endPort: 68}, + &ipMatcher{networks: []*net.IPNet{broadcastNet}}, + }, + Target: &stack.AcceptTarget{NetworkProtocol: networkProto}, + }) + } + + // Built-in rule: Allow all TCP and UDP to .1 (network gateway) + // .1 is the network gateway and needs unrestricted access + if localSubnet != "" { + _, parsedSubnet, err := net.ParseCIDR(localSubnet) + if err == nil { + // Calculate .1 IP + networkIP := parsedSubnet.IP.To4() + if networkIP != nil && !ipv6 { + gatewayOne := net.IPv4(networkIP[0], networkIP[1], networkIP[2], 1) + gatewayOneNet := &net.IPNet{IP: gatewayOne, Mask: net.CIDRMask(32, 32)} + + // Allow all TCP to .1 + rules = append(rules, stack.Rule{ + Filter: emptyFilter(ipv6), + Matchers: []stack.Matcher{ + &protocolMatcher{protocol: uint8(header.TCPProtocolNumber)}, + &ipMatcher{networks: []*net.IPNet{gatewayOneNet}}, + }, + Target: &stack.AcceptTarget{NetworkProtocol: networkProto}, + }) + + // Allow all UDP to .1 + rules = append(rules, stack.Rule{ + Filter: emptyFilter(ipv6), + Matchers: []stack.Matcher{ + &protocolMatcher{protocol: uint8(header.UDPProtocolNumber)}, + &ipMatcher{networks: []*net.IPNet{gatewayOneNet}}, + }, + Target: &stack.AcceptTarget{NetworkProtocol: networkProto}, + }) + } + } + } + + // Built-in rule: Allow UDP port 53 to Lima gateway (from config.GatewayIP, typically .2) + // DNS is handled by gvisor internally, not via forwarder + if gatewayIP != "" { + gateway := net.ParseIP(gatewayIP) + if gateway != nil { + // Only add the rule if the gateway IP version matches + isIPv6Gateway := gateway.To4() == nil + if isIPv6Gateway == ipv6 { + // Create /32 (or /128 for IPv6) network for gateway + var gatewayNet *net.IPNet + if !ipv6 { + gatewayNet = &net.IPNet{IP: gateway, Mask: net.CIDRMask(32, 32)} + } else { + gatewayNet = &net.IPNet{IP: gateway, Mask: net.CIDRMask(128, 128)} + } + + // Allow UDP port 53 to Lima gateway + rules = append(rules, stack.Rule{ + Filter: emptyFilter(ipv6), + Matchers: []stack.Matcher{ + &protocolMatcher{protocol: uint8(header.UDPProtocolNumber)}, + &portMatcher{startPort: 53, endPort: 53}, + &ipMatcher{networks: []*net.IPNet{gatewayNet}}, + }, + Target: &stack.AcceptTarget{NetworkProtocol: networkProto}, + }) + } + } + } + + // Build rules from policy (sorted by priority) + for _, policyRule := range pol.Rules { + stackRules, err := buildRulesFromPolicy(policyRule, dnsTracker, networkProto) + if err != nil { + return stack.Table{}, fmt.Errorf("failed to build rule '%s': %w", policyRule.Name, err) + } + rules = append(rules, stackRules...) + } + + // Add DNS-resolved-only check before final DROP + // This prevents IP-based escape where users bypass domain filtering by using direct IPs + // Only blocks IPs that weren't learned from DNS (i.e., direct IP access) + rules = append(rules, stack.Rule{ + Filter: emptyFilter(ipv6), + Matchers: []stack.Matcher{&dnsResolvedOnlyMatcher{tracker: dnsTracker}}, + Target: &stack.DropTarget{NetworkProtocol: networkProto}, + }) + + // ICMP filtering note: + // Due to gvisor's NAT architecture, we cannot see real ICMP destinations at the OUTPUT chain. + // All ICMP packets appear to go to the gateway IP (192.168.7.1), making destination-based + // filtering impossible. ICMP filtering must be done via policy rules (allow/deny all) rather + // than by destination IP. The policy rules above handle ICMP based on protocol matching. + + // Add default DROP rule at the end (default deny policy for OUTPUT) + outputDefaultDropIndex := len(rules) + rules = append(rules, stack.Rule{ + Filter: emptyFilter(ipv6), + Target: &stack.DropTarget{NetworkProtocol: networkProto}, + }) + + // Build the table with proper chain configuration + table := stack.Table{ + Rules: rules, + BuiltinChains: [stack.NumHooks]int{ + stack.Prerouting: stack.HookUnset, + stack.Input: inputAcceptIndex, // INPUT chain: accept all + stack.Forward: forwardAcceptIndex, // FORWARD chain: accept all + stack.Output: outputChainStart, // OUTPUT chain: policy filtering + stack.Postrouting: stack.HookUnset, + }, + Underflows: [stack.NumHooks]int{ + stack.Prerouting: stack.HookUnset, + stack.Input: inputAcceptIndex, // Default for INPUT: accept + stack.Forward: forwardAcceptIndex, // Default for FORWARD: accept + stack.Output: outputDefaultDropIndex, // Default for OUTPUT: drop + stack.Postrouting: stack.HookUnset, + }, + } + + return table, nil +} + +// buildRulesFromPolicy converts a single policy rule into one or more stack.Rule +// Multiple rules are created when the policy specifies multiple protocols or ports, +// implementing OR logic (any rule can match) rather than AND logic (all must match). +func buildRulesFromPolicy(policyRule PolicyRule, dnsTracker *Tracker, networkProto tcpip.NetworkProtocolNumber) ([]stack.Rule, error) { + var rules []stack.Rule + + // Determine the target based on action + var target stack.Target + if policyRule.IsAllowRule() { + target = &stack.AcceptTarget{NetworkProtocol: networkProto} + } else { + target = &stack.DropTarget{NetworkProtocol: networkProto} + } + + // If the rule matches all traffic, create a simple rule + if policyRule.MatchesAll() { + rules = append(rules, stack.Rule{ + Filter: emptyFilter(networkProto == header.IPv6ProtocolNumber), + Target: target, + }) + return rules, nil + } + + // Validate domain-based deny rules + if policyRule.IsDenyRule() && policyRule.Egress != nil && len(policyRule.Egress.Domains) > 0 { + for _, domain := range policyRule.Egress.Domains { + if !dnsTracker.IsPreSeeded(domain) { + return nil, fmt.Errorf("rule '%s': domain '%s' cannot be used in deny rule - only pre-seeded domains work (currently: host.lima.internal, subnet.lima.internal)", policyRule.Name, domain) + } + } + } + + // Collect IP networks from static IPs only + ipNetworks, err := collectIPNetworks(policyRule.Egress) + if err != nil { + return nil, err + } + + // Check if we have domain-based rules + hasDomains := len(policyRule.Egress.Domains) > 0 + + // Get protocols and ports to create combinations + protocols := policyRule.Egress.Protocols + ports := policyRule.Egress.Ports + + // Convert protocol names to numbers + var protoNums []uint8 + if len(protocols) > 0 { + for _, proto := range protocols { + var protoNum uint8 + switch proto { + case "tcp": + protoNum = uint8(header.TCPProtocolNumber) + case "udp": + protoNum = uint8(header.UDPProtocolNumber) + case "icmp": + // Use ICMPv6 for IPv6, ICMPv4 for IPv4 + if networkProto == header.IPv6ProtocolNumber { + protoNum = uint8(header.ICMPv6ProtocolNumber) + } else { + protoNum = uint8(header.ICMPv4ProtocolNumber) + } + default: + return nil, fmt.Errorf("unsupported protocol: %s", proto) + } + protoNums = append(protoNums, protoNum) + } + } + + // Parse port ranges + type portRange struct { + start uint16 + end uint16 + } + var portRanges []portRange + if len(ports) > 0 { + for _, portStr := range ports { + start, end, err := parsePortRange(portStr) + if err != nil { + return nil, fmt.Errorf("invalid port range '%s': %w", portStr, err) + } + portRanges = append(portRanges, portRange{start: start, end: end}) + } + } + + // Helper function to create matchers list with IP/domain filtering + createMatchersWithDestination := func(baseMatchers []stack.Matcher) [][]stack.Matcher { + var matcherSets [][]stack.Matcher + + // If we have static IPs, create a matcher set with IP matcher + if len(ipNetworks) > 0 { + m := make([]stack.Matcher, len(baseMatchers)+1) + m[0] = &ipMatcher{networks: ipNetworks} + copy(m[1:], baseMatchers) + matcherSets = append(matcherSets, m) + } + + // If we have domains, create a matcher set with domain matcher + if hasDomains { + m := make([]stack.Matcher, len(baseMatchers)+1) + m[0] = &domainMatcher{tracker: dnsTracker, patterns: policyRule.Egress.Domains} + copy(m[1:], baseMatchers) + matcherSets = append(matcherSets, m) + } + + // If we have neither IPs nor domains, just return base matchers + if len(matcherSets) == 0 { + matcherSets = append(matcherSets, baseMatchers) + } + + return matcherSets + } + + // Create rules for all combinations of protocols and ports + // This implements OR logic: any combination can match + hasProtos := len(protoNums) > 0 + hasPorts := len(portRanges) > 0 + + switch { + case hasProtos && hasPorts: + // Create one rule per protocol+port combination + for _, protoNum := range protoNums { + for _, pr := range portRanges { + baseMatchers := []stack.Matcher{ + &protocolMatcher{protocol: protoNum}, + &portMatcher{startPort: pr.start, endPort: pr.end}, + } + for _, matchers := range createMatchersWithDestination(baseMatchers) { + rules = append(rules, stack.Rule{ + Filter: emptyFilter(networkProto == header.IPv6ProtocolNumber), + Matchers: matchers, + Target: target, + }) + } + } + } + case hasProtos: + // Create one rule per protocol (no port matching) + for _, protoNum := range protoNums { + baseMatchers := []stack.Matcher{&protocolMatcher{protocol: protoNum}} + for _, matchers := range createMatchersWithDestination(baseMatchers) { + rules = append(rules, stack.Rule{ + Filter: emptyFilter(networkProto == header.IPv6ProtocolNumber), + Matchers: matchers, + Target: target, + }) + } + } + case hasPorts: + // Create one rule per port (no protocol matching - matches TCP and UDP) + for _, pr := range portRanges { + baseMatchers := []stack.Matcher{&portMatcher{startPort: pr.start, endPort: pr.end}} + for _, matchers := range createMatchersWithDestination(baseMatchers) { + rules = append(rules, stack.Rule{ + Filter: emptyFilter(networkProto == header.IPv6ProtocolNumber), + Matchers: matchers, + Target: target, + }) + } + } + default: + // No protocol or port matching, only IP/domain filtering + for _, matchers := range createMatchersWithDestination(nil) { + rules = append(rules, stack.Rule{ + Filter: emptyFilter(networkProto == header.IPv6ProtocolNumber), + Matchers: matchers, + Target: target, + }) + } + } + + return rules, nil +} + +// collectIPNetworks collects IP networks from static IPs only (not domains) +// Domain filtering is handled separately via domainMatcher. +func collectIPNetworks(match *PolicyMatch) ([]*net.IPNet, error) { + var ipNetworks []*net.IPNet + + // Collect IP networks from IPs + if len(match.IPs) > 0 { + for _, ipStr := range match.IPs { + _, ipNet, err := net.ParseCIDR(ipStr) + if err != nil { + // Try parsing as a single IP + ip := net.ParseIP(ipStr) + if ip == nil { + return nil, fmt.Errorf("invalid IP or CIDR: %s", ipStr) + } + // Convert single IP to /32 or /128 CIDR + if ip.To4() != nil { + ipNet = &net.IPNet{IP: ip, Mask: net.CIDRMask(32, 32)} + } else { + ipNet = &net.IPNet{IP: ip, Mask: net.CIDRMask(128, 128)} + } + } + ipNetworks = append(ipNetworks, ipNet) + } + } + + return ipNetworks, nil +} + +// emptyFilter returns an empty IP header filter for the given IP version. +func emptyFilter(ipv6 bool) stack.IPHeaderFilter { + if ipv6 { + return stack.EmptyFilter6() + } + return stack.EmptyFilter4() +} + +// protocolMatcher matches TCP, UDP, or ICMP protocols. +type protocolMatcher struct { + protocol uint8 +} + +func (*protocolMatcher) Name() string { + return "protocolMatcher" +} + +func (m *protocolMatcher) Match(_ stack.Hook, pkt *stack.PacketBuffer, _, _ string) (matches, hotdrop bool) { + // Get the network protocol from the packet + netProto := pkt.NetworkProtocolNumber + if netProto != header.IPv4ProtocolNumber && netProto != header.IPv6ProtocolNumber { + return false, false + } + + // Get transport protocol from the network header + var transportProto uint8 + if netProto == header.IPv4ProtocolNumber { + ipv4 := header.IPv4(pkt.NetworkHeader().Slice()) + if len(ipv4) < header.IPv4MinimumSize { + return false, true // malformed, hotdrop + } + transportProto = ipv4.Protocol() + } else { // IPv6 + ipv6 := header.IPv6(pkt.NetworkHeader().Slice()) + if len(ipv6) < header.IPv6MinimumSize { + return false, true // malformed, hotdrop + } + transportProto = uint8(ipv6.TransportProtocol()) + } + + return transportProto == m.protocol, false +} + +// portMatcher matches destination ports (single port or range). +type portMatcher struct { + startPort uint16 + endPort uint16 +} + +func (*portMatcher) Name() string { + return "portMatcher" +} + +func (m *portMatcher) Match(_ stack.Hook, pkt *stack.PacketBuffer, _, _ string) (matches, hotdrop bool) { + transportHeader := pkt.TransportHeader().Slice() + if len(transportHeader) < 4 { + return false, false + } + + // Try TCP first + if len(transportHeader) >= header.TCPMinimumSize { + tcp := header.TCP(transportHeader) + dstPort := tcp.DestinationPort() + return dstPort >= m.startPort && dstPort <= m.endPort, false + } + + // Try UDP + if len(transportHeader) >= header.UDPMinimumSize { + udp := header.UDP(transportHeader) + dstPort := udp.DestinationPort() + return dstPort >= m.startPort && dstPort <= m.endPort, false + } + + return false, false +} + +// ipMatcher matches destination IP addresses or CIDR ranges. +type ipMatcher struct { + networks []*net.IPNet +} + +func (*ipMatcher) Name() string { + return "ipMatcher" +} + +func (m *ipMatcher) Match(_ stack.Hook, pkt *stack.PacketBuffer, _, _ string) (matches, hotdrop bool) { + netProto := pkt.NetworkProtocolNumber + var dstIP net.IP + + switch netProto { + case header.IPv4ProtocolNumber: + ipv4 := header.IPv4(pkt.NetworkHeader().Slice()) + if len(ipv4) < header.IPv4MinimumSize { + return false, true // malformed, hotdrop + } + // Get destination IP from header + dstIP = net.IP(ipv4.DestinationAddressSlice()) + case header.IPv6ProtocolNumber: + ipv6 := header.IPv6(pkt.NetworkHeader().Slice()) + if len(ipv6) < header.IPv6MinimumSize { + return false, true // malformed, hotdrop + } + // Get destination IP from header + dstIP = net.IP(ipv6.DestinationAddressSlice()) + default: + return false, false + } + + // Check if the destination IP matches any of our networks + for _, network := range m.networks { + if network.Contains(dstIP) { + return true, false + } + } + + return false, false +} + +// domainMatcher matches packets based on domain patterns using DNS tracking +// It dynamically checks the DNS tracker to see if the destination IP belongs to an allowed domain. +type domainMatcher struct { + tracker *Tracker + patterns []string // domain patterns like "github.com" or "*.github.com" +} + +func (*domainMatcher) Name() string { + return "domainMatcher" +} + +func (m *domainMatcher) Match(_ stack.Hook, pkt *stack.PacketBuffer, _, _ string) (matches, hotdrop bool) { + netProto := pkt.NetworkProtocolNumber + var dstIP net.IP + + switch netProto { + case header.IPv4ProtocolNumber: + ipv4 := header.IPv4(pkt.NetworkHeader().Slice()) + if len(ipv4) < header.IPv4MinimumSize { + return false, true // malformed, hotdrop + } + dstIP = net.IP(ipv4.DestinationAddressSlice()) + case header.IPv6ProtocolNumber: + ipv6 := header.IPv6(pkt.NetworkHeader().Slice()) + if len(ipv6) < header.IPv6MinimumSize { + return false, true // malformed, hotdrop + } + dstIP = net.IP(ipv6.DestinationAddressSlice()) + default: + return false, false + } + + // Look up which domains resolve to this IP + domains := m.tracker.GetDomainsForIP(dstIP) + + // Check if any of the domains match our patterns + for _, domain := range domains { + for _, pattern := range m.patterns { + if matchDomainPattern(domain, pattern) { + return true, false + } + } + } + + return false, false +} + +// matchDomainPattern wraps matchesPattern for use in domainMatcher. +func matchDomainPattern(domain, pattern string) bool { + return matchesPattern(domain, pattern) +} + +// dnsResolvedOnlyMatcher blocks traffic to IPs that weren't learned from DNS +// This prevents IP-based escape where users bypass domain filtering by using direct IPs. +type dnsResolvedOnlyMatcher struct { + tracker *Tracker +} + +func (*dnsResolvedOnlyMatcher) Name() string { + return "dnsResolvedOnly" +} + +// isPrivateOrLocalIP checks if an IP is private, localhost, or link-local. +func isPrivateOrLocalIP(ip net.IP) bool { + if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() { + return true + } + + // Check RFC1918 private networks + privateNetworks := []string{ + "10.0.0.0/8", + "172.16.0.0/12", + "192.168.0.0/16", + } + + for _, cidr := range privateNetworks { + _, network, _ := net.ParseCIDR(cidr) + if network.Contains(ip) { + return true + } + } + + return false +} + +func (m *dnsResolvedOnlyMatcher) Match(_ stack.Hook, pkt *stack.PacketBuffer, _, _ string) (matches, hotdrop bool) { + netProto := pkt.NetworkProtocolNumber + var dstIP net.IP + + switch netProto { + case header.IPv4ProtocolNumber: + ipv4 := header.IPv4(pkt.NetworkHeader().Slice()) + if len(ipv4) < header.IPv4MinimumSize { + return false, true // malformed, hotdrop + } + dstIP = net.IP(ipv4.DestinationAddressSlice()) + case header.IPv6ProtocolNumber: + ipv6 := header.IPv6(pkt.NetworkHeader().Slice()) + if len(ipv6) < header.IPv6MinimumSize { + return false, true // malformed, hotdrop + } + dstIP = net.IP(ipv6.DestinationAddressSlice()) + default: + return false, false + } + + // Skip checking for private IPs and localhost (these don't need DNS) + if isPrivateOrLocalIP(dstIP) { + return false, false + } + + // Check if this IP was seen in any recent DNS response + domains := m.tracker.GetDomainsForIP(dstIP) + + // If no domains found, this IP wasn't learned from DNS - block it + if len(domains) == 0 { + logrus.Infof("[egress-filter] Blocked direct IP access to: %s (not resolved via DNS)", dstIP) + return true, false // Match and drop + } + + // IP was learned from DNS - allow (don't match) + return false, false +} + +// parsePortRange parses a port string like "443" or "8000-9000". +func parsePortRange(portStr string) (start, end uint16, err error) { + if strings.Contains(portStr, "-") { + parts := strings.Split(portStr, "-") + if len(parts) != 2 { + return 0, 0, fmt.Errorf("invalid port range format: %s", portStr) + } + startInt, err := strconv.Atoi(parts[0]) + if err != nil { + return 0, 0, fmt.Errorf("invalid start port: %w", err) + } + if startInt < 1 || startInt > 65535 { + return 0, 0, fmt.Errorf("start port %d out of range (1-65535)", startInt) + } + endInt, err := strconv.Atoi(parts[1]) + if err != nil { + return 0, 0, fmt.Errorf("invalid end port: %w", err) + } + if endInt < 1 || endInt > 65535 { + return 0, 0, fmt.Errorf("end port %d out of range (1-65535)", endInt) + } + if startInt > endInt { + return 0, 0, fmt.Errorf("start port %d greater than end port %d", startInt, endInt) + } + return uint16(startInt), uint16(endInt), nil + } + + port, err := strconv.Atoi(portStr) + if err != nil { + return 0, 0, fmt.Errorf("invalid port: %w", err) + } + if port < 1 || port > 65535 { + return 0, 0, fmt.Errorf("port %d out of range (1-65535)", port) + } + return uint16(port), uint16(port), nil +} diff --git a/pkg/networks/usernet/gvproxy.go b/pkg/networks/usernet/gvproxy.go index 89b05286013..b2b7861941e 100644 --- a/pkg/networks/usernet/gvproxy.go +++ b/pkg/networks/usernet/gvproxy.go @@ -22,6 +22,8 @@ import ( "github.com/containers/gvisor-tap-vsock/pkg/virtualnetwork" "github.com/sirupsen/logrus" "golang.org/x/sync/errgroup" + + "github.com/lima-vm/lima/v2/pkg/networks/usernet/filter" ) type GVisorNetstackOpts struct { @@ -36,6 +38,7 @@ type GVisorNetstackOpts struct { Async bool DefaultLeases map[string]string + Policy *filter.Policy // Pre-parsed policy (nil = no filtering) } var opts *GVisorNetstackOpts @@ -100,6 +103,16 @@ func run(ctx context.Context, g *errgroup.Group, configuration *types.Configurat return err } + // Optionally wrap with policy filtering + if opts.Policy != nil { + logrus.Debugf("Applying policy with %d rules", len(opts.Policy.Rules)) + fvn, err := filter.Filter(vn, configuration, opts.Policy) + if err != nil { + return fmt.Errorf("failed to apply policy: %w", err) + } + vn = fvn.VirtualNetwork() + } + ln, err := transport.Listen(fmt.Sprintf("unix://%s", opts.Endpoint)) if err != nil { return err diff --git a/pkg/networks/usernet/recoincile.go b/pkg/networks/usernet/recoincile.go index ebc477ceaa6..a84f158b088 100644 --- a/pkg/networks/usernet/recoincile.go +++ b/pkg/networks/usernet/recoincile.go @@ -69,6 +69,11 @@ func Start(ctx context.Context, name string) error { return err } + policyPath, err := PolicyFile(name) + if err != nil { + return err + } + err = lockutil.WithDirLock(usernetDir, func() error { self, err := os.Executable() if err != nil { @@ -85,6 +90,10 @@ func Start(ctx context.Context, name string) error { if leasesString != "" { args = append(args, "--leases", leasesString) } + // Add policy path if policy.json exists + if _, err := os.Stat(policyPath); err == nil { + args = append(args, "--policy", policyPath) + } cmd := exec.CommandContext(ctx, self, args...) cmd.SysProcAttr = executil.BackgroundSysProcAttr diff --git a/website/content/en/docs/config/network/policy.md b/website/content/en/docs/config/network/policy.md new file mode 100644 index 00000000000..5846f9aff95 --- /dev/null +++ b/website/content/en/docs/config/network/policy.md @@ -0,0 +1,237 @@ +--- +title: Network Policy Filtering (user-v2) +weight: 35 +--- + +| ⚡ Requirement | Lima >= 2.1, user-v2 network mode only | +|-------------------|----------------| + +Network policy filtering for `user-v2` networks allows you to control egress (outbound) traffic using declarative policy rules. + +## Overview + +Provides: + +- **Protocol filtering** (TCP, UDP, ICMP) +- **Port-based rules** (single ports or ranges) +- **IP/CIDR-based rules** (allow/deny specific destinations) +- **Domain-based rules** (with wildcard support like `*.example.com`) + +## Configuration + +### 1. Create a policy file + +Policy files use YAML or JSON format. Example `~/my-policy.yaml`: + +```yaml +version: "1.0" + +rules: + # Rules are evaluated in priority order (lowest priority number first) + + # Block cloud metadata service + - name: block-cloud-metadata + action: deny + priority: 5 + egress: + ips: + - 169.254.169.254/32 + - fd00:ec2::254/128 + + # Allow HTTPS to specific domains + - name: allow-github + action: allow + priority: 10 + egress: + protocols: + - tcp + domains: + - github.com + - "*.github.com" + - "*.githubusercontent.com" + ports: + - "443" + + + # Allow ICMP (ping) + - name: allow-icmp + action: allow + priority: 25 + egress: + protocols: + - icmp + +``` + +### 2. Create network with policy + +Use `limactl network create` with the `--policy` flag: + +```bash +limactl network create mynetwork --mode user-v2 --policy ~/my-policy.yaml +``` + +This validates and saves the policy for the network. The policy will be automatically applied when instances use this network. + +### 3. Connect instances to the network + +{{< tabpane text=true >}} +{{% tab header="CLI" %}} +```bash +limactl start --network=lima:mynetwork +``` +{{% /tab %}} +{{% tab header="YAML" %}} +```yaml +networks: + - lima: mynetwork +``` +{{% /tab %}} +{{< /tabpane >}} + +## Policy Format + +### Policy Structure + +```yaml +version: "1.0" # Required: policy version + +rules: # Required: list of filtering rules + - name: rule-name # Required: unique rule identifier + action: allow|deny # Required: "allow" or "deny" + priority: # Required: evaluation priority (lower = higher priority) + egress: # Optional: match criteria (omit to match all traffic) + protocols: # Optional: list of protocols + - tcp|udp|icmp + domains: # Optional: list of domain patterns + - example.com + - "*.example.com" # Wildcard support + ips: # Optional: list of IP addresses or CIDR blocks + - 192.168.1.0/24 + - 10.0.0.1 + ports: # Optional: list of ports or port ranges + - "443" + - "8000-9000" +``` + +### Field Descriptions + +- **version**: Policy format version (currently only `"1.0"` supported) +- **name**: Unique identifier for the rule +- **action**: `allow` (permit traffic) or `deny` (block traffic) +- **priority**: Numeric priority (rules evaluated from lowest to highest) +- **egress**: Match criteria for outbound traffic (all criteria are AND'ed together) + - **protocols**: TCP, UDP, or ICMP + - **domains**: Exact domains or wildcard patterns (requires DNS resolution) + - **ips**: IP addresses or CIDR notation (IPv4 and IPv6 supported) + - **ports**: Single ports (`"443"`) or ranges (`"8000-9000"`) + +### Rule Evaluation + +1. Rules are sorted by `priority` (lowest first) +2. For each packet, rules are evaluated in priority order +3. First matching rule determines the action (allow/deny) +4. If no rules match, traffic is **denied** by default +5. Domain-based rules match based on DNS query tracking with TTL + +### Domain Matching + +Domain-based rules use DNS query tracking: + +- DNS responses are monitored and domain-to-IP mappings cached +- Wildcard patterns supported: `*.example.com` matches subdomains +- Mappings expire based on DNS TTL (with 10,000 domain limit) +- Both IPv4 (A) and IPv6 (AAAA) records are tracked + +## Examples + +### Example 1: Developer Workstation + +Allow common development traffic, block everything else: + +```yaml +version: "1.0" + +rules: + - name: allow-http-https + action: allow + priority: 10 + egress: + protocols: [tcp] + ports: ["80", "443"] + + - name: allow-ssh + action: allow + priority: 30 + egress: + protocols: [tcp] + ports: ["22"] +``` + +### Example 2: Restricted Environment + +Allow only specific services: + +```yaml +version: "1.0" + +rules: + - name: allow-package-repos + action: allow + priority: 10 + egress: + protocols: [tcp] + domains: + - "*.ubuntu.com" + - "*.debian.org" + - "*.alpinelinux.org" + ports: ["80", "443"] + +``` + +### Example 3: Security-Focused + +Block metadata services and limit egress: + +```yaml +version: "1.0" + +rules: + - name: block-metadata + action: deny + priority: 1 + egress: + ips: + - 169.254.169.254/32 # AWS/GCP/Azure metadata + - fd00:ec2::254/128 # AWS IPv6 metadata + + - name: allow-https-only + action: allow + priority: 20 + egress: + protocols: [tcp] + ports: ["443"] +``` + +## Troubleshooting + +### Traffic being blocked unexpectedly + +1. Check rule priority - ensure allow rules have lower priority numbers than deny rules +2. Verify all match criteria (protocols, ports, IPs/domains) are specified correctly +3. For domain-based rules, ensure DNS is allowed (UDP port 53) +4. Remember that all egress criteria within a rule are AND'ed together + +### Domain-based rules not working + +1. Ensure DNS traffic (UDP port 53) is allowed +2. Domain matching only works after DNS resolution occurs +3. Check that domain patterns are correct (`*.example.com` matches subdomains only) +4. DNS cache has a 10,000 domain limit; old entries are evicted + +## Notes + +- Policy filtering adds minimal overhead (DNS tracking and iptables rule evaluation) +- IPv6 is fully supported +- Policies are immutable after network start (restart network to apply changes) +- DNS tracking uses up to 10,000 entries with automatic expiration and eviction diff --git a/website/content/en/docs/config/network/user-v2.md b/website/content/en/docs/config/network/user-v2.md index c4710fd4bf0..d2ef942f318 100644 --- a/website/content/en/docs/config/network/user-v2.md +++ b/website/content/en/docs/config/network/user-v2.md @@ -8,7 +8,9 @@ weight: 32 user-v2 network provides a user-mode networking similar to the [default user-mode network](#user-mode-network--1921685024-) and also provides support for `vm -> vm` communication. -To enable this network mode, define a network with `mode: user-v2` in networks.yaml +This network mode also supports [network policy filtering]({{< ref "/docs/config/network/policy" >}}) for egress traffic control. + +To enable this network mode, define a network with `mode: user-v2` in networks.yaml or create one using `limactl network create` By default, the below network configuration is already applied (Since v0.18). @@ -40,6 +42,21 @@ networks: An instance's IP address is resolvable from another instance as `lima-.internal.` (e.g., `lima-default.internal.`). +## Creating Networks + +You can create custom user-v2 networks using the CLI: + +```bash +# Basic network +limactl network create mynetwork --mode user-v2 --gateway 192.168.42.1/24 + +# Network with policy filtering +limactl network create secure-net --mode user-v2 --gateway 192.168.43.1/24 --policy ~/my-policy.yaml +``` + +The `--policy` flag allows you to specify a YAML or JSON policy file for [egress traffic filtering]({{< ref "/docs/config/network/policy" >}}). + _Note_ - Enabling this network will disable the [default user-mode network]({{< ref "/docs/config/network/user" >}}) +- Policy filtering is only available for user-v2 networks