diff --git a/image/cache.go b/image/cache.go index f45e45e..5377486 100644 --- a/image/cache.go +++ b/image/cache.go @@ -8,6 +8,7 @@ import ( "encoding/hex" "encoding/json" "fmt" + "io/fs" "os" "path/filepath" "strings" @@ -145,11 +146,256 @@ func (c *Cache) Evict(maxAge time.Duration) (int, error) { return removed, nil } +// CacheEntry holds metadata about a single cached rootfs entry. +type CacheEntry struct { + // Digest is the OCI manifest digest (e.g. "sha256:abc123..."). + Digest string + // Path is the absolute filesystem path to the extracted rootfs. + Path string + // Size is the total size in bytes of all files in the rootfs directory. + Size int64 + // ModTime is the modification time of the cache directory entry. + // Updated by Get on cache hits; otherwise reflects creation time. + ModTime time.Time + // Refs lists image references (e.g. "ghcr.io/org/image:latest") that + // point to this digest via the ref index. Empty for orphaned entries. + Refs []string +} + +// List returns metadata for all cached rootfs entries along with the image +// references that point to each digest. Orphaned entries (no refs) will +// have an empty Refs slice. +func (c *Cache) List() ([]CacheEntry, error) { + if c == nil { + return nil, nil + } + + entries, err := os.ReadDir(c.baseDir) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf("read cache dir: %w", err) + } + + // Build reverse map: digest → []imageRef from the ref index. + refMap := c.buildRefMap() + + var result []CacheEntry + for _, entry := range entries { + name := entry.Name() + + // Only consider rootfs entries (sha256-*), skip refs/, layers/, tmp-*. + if !isRootfsEntry(name) { + continue + } + + info, err := entry.Info() + if err != nil { + continue + } + + digest := dirNameToDigest(name) + entryPath := filepath.Join(c.baseDir, name) + size := dirSize(entryPath) + + result = append(result, CacheEntry{ + Digest: digest, + Path: entryPath, + Size: size, + ModTime: info.ModTime(), + Refs: refMap[digest], + }) + } + + return result, nil +} + +// GC removes rootfs entries not referenced by any ref index entry. +// Unlike [Evict] (which is time-based), GC is reachability-based: an entry +// survives if and only if at least one ref points to its digest. +// Returns the number of entries removed. +// +// GC is not safe for concurrent use with [Pull]. If another process is +// pulling an image while GC runs, the pulled entry may be collected before +// the ref index is updated. The consequence is a cache miss on the next +// run, not data corruption. +func (c *Cache) GC() (int, error) { + if c == nil { + return 0, nil + } + + entries, err := os.ReadDir(c.baseDir) + if err != nil { + if os.IsNotExist(err) { + return 0, nil + } + return 0, fmt.Errorf("read cache dir: %w", err) + } + + live, err := c.liveDigests() + if err != nil { + return 0, fmt.Errorf("enumerate live digests: %w", err) + } + + removed := 0 + + for _, entry := range entries { + name := entry.Name() + if !isRootfsEntry(name) { + continue + } + + digest := dirNameToDigest(name) + if live[digest] { + continue + } + + entryPath := filepath.Join(c.baseDir, name) + if err := os.RemoveAll(entryPath); err != nil { + continue + } + removed++ + } + + return removed, nil +} + +// Purge removes the entire cache directory including all rootfs entries, +// the ref index, and the layer cache. +func (c *Cache) Purge() error { + if c == nil { + return nil + } + if err := os.RemoveAll(c.baseDir); err != nil { + return fmt.Errorf("remove cache dir: %w", err) + } + return nil +} + +// liveDigests returns the set of digests referenced by at least one ref +// index entry. Returns a nil map and nil error when the refs directory +// does not exist (no images have been pulled yet). Returns a non-nil +// error if the refs directory exists but cannot be read, so callers +// can abort rather than treating all entries as orphaned. +func (c *Cache) liveDigests() (map[string]bool, error) { + refsDir := filepath.Join(c.baseDir, refDir) + entries, err := os.ReadDir(refsDir) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf("read refs dir: %w", err) + } + + live := make(map[string]bool, len(entries)) + for _, entry := range entries { + if entry.IsDir() { + continue + } + data, err := os.ReadFile(filepath.Join(refsDir, entry.Name())) + if err != nil { + continue + } + _, digest := parseRefFile(data) + if digest != "" { + live[digest] = true + } + } + return live, nil +} + +// buildRefMap returns a map from digest to the list of image references +// that point to it. +func (c *Cache) buildRefMap() map[string][]string { + refsDir := filepath.Join(c.baseDir, refDir) + entries, err := os.ReadDir(refsDir) + if err != nil { + return nil + } + + refMap := make(map[string][]string) + for _, entry := range entries { + if entry.IsDir() { + continue + } + data, err := os.ReadFile(filepath.Join(refsDir, entry.Name())) + if err != nil { + continue + } + imageRef, digest := parseRefFile(data) + if digest == "" { + continue + } + // Skip empty image refs from legacy-format files. The entry still + // counts as referenced for GC (via liveDigests), but we don't add + // an empty string to the Refs slice. + if imageRef != "" { + refMap[digest] = append(refMap[digest], imageRef) + } + } + return refMap +} + +// parseRefFile parses the content of a ref index file. The file may contain +// either the legacy format (digest only) or the extended format +// (imageRef\tdigest). Returns the image reference (empty for legacy format) +// and the digest. +func parseRefFile(data []byte) (imageRef, digest string) { + content := strings.TrimSpace(string(data)) + if content == "" { + return "", "" + } + if idx := strings.IndexByte(content, '\t'); idx >= 0 { + return content[:idx], content[idx+1:] + } + // Legacy format: digest only. + return "", content +} + +// isRootfsEntry returns true if the directory name looks like a cached +// rootfs entry (starts with "sha256-") rather than a special directory. +func isRootfsEntry(name string) bool { + return strings.HasPrefix(name, "sha256-") +} + +// dirNameToDigest converts a filesystem-safe directory name back to a digest. +// "sha256-abc123" → "sha256:abc123". +func dirNameToDigest(name string) string { + return strings.Replace(name, "-", ":", 1) +} + +// dirSize walks a directory tree and returns the total size of all regular +// files. Errors are silently ignored; the returned size is best-effort. +func dirSize(path string) int64 { + var total int64 + _ = filepath.WalkDir(path, func(_ string, d fs.DirEntry, err error) error { + if err != nil { + return nil + } + if d.IsDir() { + return nil + } + info, err := d.Info() + if err != nil { + return nil + } + total += info.Size() + return nil + }) + return total +} + // pathFor converts a digest like "sha256:abc123..." into a filesystem path -// inside the cache directory. The colon is replaced to avoid filesystem issues. +// inside the cache directory. Only the first colon is replaced so the +// round-trip with [dirNameToDigest] is symmetric. func (c *Cache) pathFor(digest string) string { - // Replace "sha256:" prefix with "sha256-" for filesystem safety. - safe := strings.ReplaceAll(digest, ":", "-") + if strings.ContainsAny(digest, "/\\") || strings.Contains(digest, "..") { + // Defense-in-depth: reject digests that could escape the cache dir. + // Normal OCI digests are "algorithm:hex" with no path separators. + return filepath.Join(c.baseDir, "invalid-digest") + } + safe := strings.Replace(digest, ":", "-", 1) return filepath.Join(c.baseDir, safe) } @@ -201,28 +447,32 @@ func (c *Cache) StoreRef(imageRef, digest string, cfg *OCIConfig) { c.putConfig(digest, cfg) } -// getRef returns the cached digest for an image reference. +// getRef returns the cached digest for an image reference. It handles +// both the legacy format (digest only) and the extended format +// (imageRef\tdigest). func (c *Cache) getRef(imageRef string) (string, bool) { p := c.refPath(imageRef) data, err := os.ReadFile(p) if err != nil { return "", false } - digest := strings.TrimSpace(string(data)) + _, digest := parseRefFile(data) if digest == "" { return "", false } return digest, true } -// putRef stores the ref→digest mapping as a small file. +// putRef stores the ref→digest mapping as a small file. The file uses the +// extended format "imageRef\tdigest\n" so that List/GC can recover the +// original image reference from the hashed filename. func (c *Cache) putRef(imageRef, digest string) { dir := filepath.Join(c.baseDir, refDir) if err := os.MkdirAll(dir, 0o700); err != nil { return } p := c.refPath(imageRef) - _ = os.WriteFile(p, []byte(digest+"\n"), 0o600) + _ = os.WriteFile(p, []byte(imageRef+"\t"+digest+"\n"), 0o600) } // refPath returns the filesystem path for a ref index entry. The image diff --git a/image/cache_test.go b/image/cache_test.go index 7f9f7e9..d7c2498 100644 --- a/image/cache_test.go +++ b/image/cache_test.go @@ -4,6 +4,8 @@ package image import ( + "crypto/sha256" + "encoding/hex" "os" "path/filepath" "testing" @@ -248,3 +250,354 @@ func TestCache_Get_TouchesMtime(t *testing.T) { assert.True(t, info.ModTime().After(before) || info.ModTime().Equal(before), "Get should update mtime to prevent eviction") } + +// --- List tests --- + +func TestCache_List_Empty(t *testing.T) { + t.Parallel() + + c := NewCache(t.TempDir()) + + entries, err := c.List() + require.NoError(t, err) + assert.Empty(t, entries) +} + +func TestCache_List_NilCache(t *testing.T) { + t.Parallel() + + var c *Cache + entries, err := c.List() + require.NoError(t, err) + assert.Nil(t, entries) +} + +func TestCache_List_WithEntries(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Create two rootfs entries with files. + for _, name := range []string{"sha256-aaa111", "sha256-bbb222"} { + dir := filepath.Join(cacheDir, name) + require.NoError(t, os.MkdirAll(dir, 0o700)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "file.txt"), []byte("hello"), 0o644)) + } + + // Create a ref pointing to one of them (extended format). + refsDir := filepath.Join(cacheDir, "refs") + require.NoError(t, os.MkdirAll(refsDir, 0o700)) + refContent := "ghcr.io/org/image:latest\tsha256:aaa111\n" + refHash := sha256.Sum256([]byte("ghcr.io/org/image:latest")) + refFile := filepath.Join(refsDir, hex.EncodeToString(refHash[:])) + require.NoError(t, os.WriteFile(refFile, []byte(refContent), 0o600)) + + entries, err := c.List() + require.NoError(t, err) + require.Len(t, entries, 2) + + // Build a map for order-independent assertions. + byDigest := make(map[string]CacheEntry) + for _, e := range entries { + byDigest[e.Digest] = e + } + + withRefs, ok := byDigest["sha256:aaa111"] + require.True(t, ok, "expected entry for sha256:aaa111") + assert.Equal(t, []string{"ghcr.io/org/image:latest"}, withRefs.Refs) + assert.Equal(t, int64(5), withRefs.Size) // "hello" is 5 bytes + + orphan, ok := byDigest["sha256:bbb222"] + require.True(t, ok, "expected entry for sha256:bbb222") + assert.Empty(t, orphan.Refs) +} + +func TestCache_List_SkipsNonRootfs(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Create entries that should be skipped. + for _, name := range []string{"refs", "layers", "tmp-rootfs-abc"} { + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, name), 0o700)) + } + + // Create one real rootfs entry. + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, "sha256-real"), 0o700)) + + entries, err := c.List() + require.NoError(t, err) + require.Len(t, entries, 1) + assert.Equal(t, "sha256:real", entries[0].Digest) +} + +func TestCache_List_LegacyRefFormat(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Create a rootfs entry. + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, "sha256-legacy"), 0o700)) + + // Create a ref in legacy format (digest only, no imageRef). + refsDir := filepath.Join(cacheDir, "refs") + require.NoError(t, os.MkdirAll(refsDir, 0o700)) + refHash := sha256.Sum256([]byte("some-image:latest")) + refFile := filepath.Join(refsDir, hex.EncodeToString(refHash[:])) + require.NoError(t, os.WriteFile(refFile, []byte("sha256:legacy\n"), 0o600)) + + entries, err := c.List() + require.NoError(t, err) + require.Len(t, entries, 1) + + // Legacy format: entry is referenced (not orphaned) but imageRef + // is not recoverable, so Refs is empty (no empty strings). + assert.Equal(t, "sha256:legacy", entries[0].Digest) + assert.Empty(t, entries[0].Refs) +} + +// --- GC tests --- + +func TestCache_GC_NilCache(t *testing.T) { + t.Parallel() + + var c *Cache + removed, err := c.GC() + require.NoError(t, err) + assert.Equal(t, 0, removed) +} + +func TestCache_GC_RemovesOrphans(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Create two rootfs entries. + liveDir := filepath.Join(cacheDir, "sha256-live") + orphanDir := filepath.Join(cacheDir, "sha256-orphan") + require.NoError(t, os.MkdirAll(liveDir, 0o700)) + require.NoError(t, os.MkdirAll(orphanDir, 0o700)) + + // Create a ref pointing only to the live entry. + refsDir := filepath.Join(cacheDir, "refs") + require.NoError(t, os.MkdirAll(refsDir, 0o700)) + refHash := sha256.Sum256([]byte("my-image:latest")) + refFile := filepath.Join(refsDir, hex.EncodeToString(refHash[:])) + require.NoError(t, os.WriteFile(refFile, []byte("my-image:latest\tsha256:live\n"), 0o600)) + + removed, err := c.GC() + require.NoError(t, err) + assert.Equal(t, 1, removed) + + // Live entry should remain. + _, err = os.Stat(liveDir) + assert.NoError(t, err) + + // Orphan should be gone. + _, err = os.Stat(orphanDir) + assert.True(t, os.IsNotExist(err)) +} + +func TestCache_GC_PreservesNonRootfs(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Create non-rootfs entries that GC should not touch. + refsDir := filepath.Join(cacheDir, "refs") + layersDir := filepath.Join(cacheDir, "layers") + tmpDir := filepath.Join(cacheDir, "tmp-rootfs-inflight") + require.NoError(t, os.MkdirAll(refsDir, 0o700)) + require.NoError(t, os.MkdirAll(layersDir, 0o700)) + require.NoError(t, os.MkdirAll(tmpDir, 0o700)) + + removed, err := c.GC() + require.NoError(t, err) + assert.Equal(t, 0, removed) + + // All should still exist. + for _, d := range []string{refsDir, layersDir, tmpDir} { + _, err := os.Stat(d) + assert.NoError(t, err, "should not remove %s", d) + } +} + +func TestCache_GC_NoRefs(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Create rootfs entries with no refs at all. + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, "sha256-a"), 0o700)) + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, "sha256-b"), 0o700)) + + removed, err := c.GC() + require.NoError(t, err) + assert.Equal(t, 2, removed) +} + +// --- Purge tests --- + +func TestCache_Purge_NilCache(t *testing.T) { + t.Parallel() + + var c *Cache + err := c.Purge() + require.NoError(t, err) +} + +func TestCache_Purge_RemovesEverything(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Populate cache. + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, "sha256-entry"), 0o700)) + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, "refs"), 0o700)) + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, "layers", "sha256-layer"), 0o700)) + + err := c.Purge() + require.NoError(t, err) + + _, err = os.Stat(cacheDir) + assert.True(t, os.IsNotExist(err)) +} + +func TestCache_Purge_NonExistentDir(t *testing.T) { + t.Parallel() + + c := NewCache(filepath.Join(t.TempDir(), "does-not-exist")) + err := c.Purge() + require.NoError(t, err) +} + +// --- Ref format tests --- + +func TestParseRefFile_ExtendedFormat(t *testing.T) { + t.Parallel() + + data := []byte("ghcr.io/org/image:latest\tsha256:abc123\n") + imageRef, digest := parseRefFile(data) + assert.Equal(t, "ghcr.io/org/image:latest", imageRef) + assert.Equal(t, "sha256:abc123", digest) +} + +func TestParseRefFile_LegacyFormat(t *testing.T) { + t.Parallel() + + data := []byte("sha256:abc123\n") + imageRef, digest := parseRefFile(data) + assert.Equal(t, "", imageRef) + assert.Equal(t, "sha256:abc123", digest) +} + +func TestParseRefFile_Empty(t *testing.T) { + t.Parallel() + + imageRef, digest := parseRefFile([]byte("")) + assert.Equal(t, "", imageRef) + assert.Equal(t, "", digest) +} + +func TestPutRef_ExtendedFormat(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + c.putRef("ghcr.io/org/image:v1", "sha256:deadbeef") + + // Read the file back and verify extended format. + p := c.refPath("ghcr.io/org/image:v1") + data, err := os.ReadFile(p) + require.NoError(t, err) + assert.Equal(t, "ghcr.io/org/image:v1\tsha256:deadbeef\n", string(data)) + + // getRef should still work. + digest, ok := c.getRef("ghcr.io/org/image:v1") + assert.True(t, ok) + assert.Equal(t, "sha256:deadbeef", digest) +} + +func TestCache_GC_NonExistentDir(t *testing.T) { + t.Parallel() + + c := NewCache(filepath.Join(t.TempDir(), "does-not-exist")) + + removed, err := c.GC() + require.NoError(t, err) + assert.Equal(t, 0, removed) +} + +func TestCache_GC_MultipleRefsToSameDigest(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Create one rootfs entry. + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, "sha256-shared"), 0o700)) + + // Create two refs pointing to the same digest. + refsDir := filepath.Join(cacheDir, "refs") + require.NoError(t, os.MkdirAll(refsDir, 0o700)) + for _, ref := range []string{"image:latest", "image:v1"} { + h := sha256.Sum256([]byte(ref)) + p := filepath.Join(refsDir, hex.EncodeToString(h[:])) + require.NoError(t, os.WriteFile(p, []byte(ref+"\tsha256:shared\n"), 0o600)) + } + + removed, err := c.GC() + require.NoError(t, err) + assert.Equal(t, 0, removed) + + // Entry should survive. + _, err = os.Stat(filepath.Join(cacheDir, "sha256-shared")) + assert.NoError(t, err) +} + +func TestCache_GC_CorruptRefFiles(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Create a rootfs entry. + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, "sha256-orphan"), 0o700)) + + // Create refs dir with corrupt files (empty, whitespace-only). + refsDir := filepath.Join(cacheDir, "refs") + require.NoError(t, os.MkdirAll(refsDir, 0o700)) + require.NoError(t, os.WriteFile(filepath.Join(refsDir, "empty"), []byte(""), 0o600)) + require.NoError(t, os.WriteFile(filepath.Join(refsDir, "whitespace"), []byte(" \n"), 0o600)) + + // Corrupt refs should not protect the orphaned entry. + removed, err := c.GC() + require.NoError(t, err) + assert.Equal(t, 1, removed) +} + +func TestGetRef_BackwardCompatible(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Write a legacy-format ref file manually. + refsDir := filepath.Join(cacheDir, "refs") + require.NoError(t, os.MkdirAll(refsDir, 0o700)) + p := c.refPath("old-image:latest") + require.NoError(t, os.WriteFile(p, []byte("sha256:olddigest\n"), 0o600)) + + // getRef should parse the legacy format. + digest, ok := c.getRef("old-image:latest") + assert.True(t, ok) + assert.Equal(t, "sha256:olddigest", digest) +} diff --git a/image/layer_cache.go b/image/layer_cache.go index 20443d1..51d6484 100644 --- a/image/layer_cache.go +++ b/image/layer_cache.go @@ -5,6 +5,7 @@ package image import ( "fmt" + "io/fs" "os" "path/filepath" "time" @@ -118,6 +119,36 @@ func (lc *LayerCache) Evict(maxAge time.Duration) (int, error) { return removed, nil } +// Size returns the total size in bytes of all cached layer entries. +// Returns 0 if the layer cache directory does not exist. +func (lc *LayerCache) Size() (int64, error) { + if lc == nil { + return 0, nil + } + var total int64 + err := filepath.WalkDir(lc.baseDir, func(_ string, d fs.DirEntry, err error) error { + if err != nil { + if os.IsNotExist(err) { + return fs.SkipAll + } + return nil + } + if d.IsDir() { + return nil + } + info, err := d.Info() + if err != nil { + return nil + } + total += info.Size() + return nil + }) + if err != nil { + return total, fmt.Errorf("walk layer cache: %w", err) + } + return total, nil +} + // pathFor converts a DiffID into a filesystem path inside the cache directory. func (lc *LayerCache) pathFor(diffID v1.Hash) string { safe := diffID.Algorithm + "-" + diffID.Hex diff --git a/image/layer_cache_test.go b/image/layer_cache_test.go index eb3ead3..06c8600 100644 --- a/image/layer_cache_test.go +++ b/image/layer_cache_test.go @@ -195,3 +195,54 @@ func TestLayerCache_Evict_NonExistentDir(t *testing.T) { require.NoError(t, err) assert.Equal(t, 0, removed) } + +// --- Size tests --- + +func TestLayerCache_Size_Empty(t *testing.T) { + t.Parallel() + + lc := NewLayerCache(t.TempDir()) + + size, err := lc.Size() + require.NoError(t, err) + assert.Equal(t, int64(0), size) +} + +func TestLayerCache_Size_Nil(t *testing.T) { + t.Parallel() + + var lc *LayerCache + size, err := lc.Size() + require.NoError(t, err) + assert.Equal(t, int64(0), size) +} + +func TestLayerCache_Size_NonExistentDir(t *testing.T) { + t.Parallel() + + lc := NewLayerCache(filepath.Join(t.TempDir(), "does-not-exist")) + + size, err := lc.Size() + require.NoError(t, err) + assert.Equal(t, int64(0), size) +} + +func TestLayerCache_Size_WithEntries(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + lc := NewLayerCache(cacheDir) + + // Create two layer directories with known file sizes. + layer1 := filepath.Join(cacheDir, "sha256-layer1") + require.NoError(t, os.MkdirAll(layer1, 0o700)) + require.NoError(t, os.WriteFile(filepath.Join(layer1, "data"), make([]byte, 1000), 0o644)) + + layer2 := filepath.Join(cacheDir, "sha256-layer2") + require.NoError(t, os.MkdirAll(layer2, 0o700)) + require.NoError(t, os.WriteFile(filepath.Join(layer2, "data"), make([]byte, 2000), 0o644)) + + size, err := lc.Size() + require.NoError(t, err) + assert.Equal(t, int64(3000), size) +}