From 278a54e4417e5af76e3dba2f53f093b7f8d089cb Mon Sep 17 00:00:00 2001 From: Juan Antonio Osorio Date: Tue, 24 Mar 2026 11:04:13 +0000 Subject: [PATCH 1/2] Add cache management API: List, GC, Purge Add CacheEntry type and three new methods to image.Cache: - List() returns metadata for all cached rootfs entries including digest, size, last-used time, and which image refs point to each. - GC() removes rootfs entries not referenced by any ref index entry (reachability-based, unlike the time-based Evict). - Purge() removes the entire cache directory. Extend the ref index format to store the original image reference alongside the digest (imageRef\tdigest), enabling List to report human-readable ref names. getRef handles both legacy and extended formats for backward compatibility. Add LayerCache.Size() for reporting total layer cache disk usage. Ref: stacklok/brood-box#84 Co-Authored-By: Claude Opus 4.6 (1M context) --- image/cache.go | 231 ++++++++++++++++++++++++++++- image/cache_test.go | 296 ++++++++++++++++++++++++++++++++++++++ image/layer_cache.go | 31 ++++ image/layer_cache_test.go | 51 +++++++ 4 files changed, 605 insertions(+), 4 deletions(-) diff --git a/image/cache.go b/image/cache.go index f45e45e..9f12068 100644 --- a/image/cache.go +++ b/image/cache.go @@ -8,6 +8,7 @@ import ( "encoding/hex" "encoding/json" "fmt" + "io/fs" "os" "path/filepath" "strings" @@ -145,6 +146,224 @@ func (c *Cache) Evict(maxAge time.Duration) (int, error) { return removed, nil } +// CacheEntry holds metadata about a single cached rootfs entry. +type CacheEntry struct { + // Digest is the OCI manifest digest (e.g. "sha256:abc123..."). + Digest string + // Path is the absolute filesystem path to the extracted rootfs. + Path string + // Size is the total size in bytes of all files in the rootfs directory. + Size int64 + // ModTime is the last time this entry was accessed (touched by Get). + ModTime time.Time + // Refs lists image references (e.g. "ghcr.io/org/image:latest") that + // point to this digest via the ref index. Empty for orphaned entries. + Refs []string +} + +// List returns metadata for all cached rootfs entries along with the image +// references that point to each digest. Orphaned entries (no refs) will +// have an empty Refs slice. +func (c *Cache) List() ([]CacheEntry, error) { + if c == nil { + return nil, nil + } + + entries, err := os.ReadDir(c.baseDir) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf("read cache dir: %w", err) + } + + // Build reverse map: digest → []imageRef from the ref index. + refMap := c.buildRefMap() + + var result []CacheEntry + for _, entry := range entries { + name := entry.Name() + + // Only consider rootfs entries (sha256-*), skip refs/, layers/, tmp-*. + if !isRootfsEntry(name) { + continue + } + + info, err := entry.Info() + if err != nil { + continue + } + + digest := dirNameToDigest(name) + entryPath := filepath.Join(c.baseDir, name) + size := dirSize(entryPath) + + result = append(result, CacheEntry{ + Digest: digest, + Path: entryPath, + Size: size, + ModTime: info.ModTime(), + Refs: refMap[digest], + }) + } + + return result, nil +} + +// GC removes rootfs entries not referenced by any ref index entry. +// Unlike [Evict] (which is time-based), GC is reachability-based: an entry +// survives if and only if at least one ref points to its digest. +// Returns the number of entries removed. +func (c *Cache) GC() (int, error) { + if c == nil { + return 0, nil + } + + entries, err := os.ReadDir(c.baseDir) + if err != nil { + if os.IsNotExist(err) { + return 0, nil + } + return 0, fmt.Errorf("read cache dir: %w", err) + } + + live := c.liveDigests() + removed := 0 + + for _, entry := range entries { + name := entry.Name() + if !isRootfsEntry(name) { + continue + } + + digest := dirNameToDigest(name) + if live[digest] { + continue + } + + entryPath := filepath.Join(c.baseDir, name) + if err := os.RemoveAll(entryPath); err != nil { + continue + } + removed++ + } + + return removed, nil +} + +// Purge removes the entire cache directory including all rootfs entries, +// the ref index, and the layer cache. +func (c *Cache) Purge() error { + if c == nil { + return nil + } + if err := os.RemoveAll(c.baseDir); err != nil { + return fmt.Errorf("remove cache dir: %w", err) + } + return nil +} + +// liveDigests returns the set of digests referenced by at least one ref +// index entry. +func (c *Cache) liveDigests() map[string]bool { + refsDir := filepath.Join(c.baseDir, refDir) + entries, err := os.ReadDir(refsDir) + if err != nil { + return nil + } + + live := make(map[string]bool, len(entries)) + for _, entry := range entries { + if entry.IsDir() { + continue + } + data, err := os.ReadFile(filepath.Join(refsDir, entry.Name())) + if err != nil { + continue + } + _, digest := parseRefFile(data) + if digest != "" { + live[digest] = true + } + } + return live +} + +// buildRefMap returns a map from digest to the list of image references +// that point to it. +func (c *Cache) buildRefMap() map[string][]string { + refsDir := filepath.Join(c.baseDir, refDir) + entries, err := os.ReadDir(refsDir) + if err != nil { + return nil + } + + refMap := make(map[string][]string) + for _, entry := range entries { + if entry.IsDir() { + continue + } + data, err := os.ReadFile(filepath.Join(refsDir, entry.Name())) + if err != nil { + continue + } + imageRef, digest := parseRefFile(data) + if digest != "" { + refMap[digest] = append(refMap[digest], imageRef) + } + } + return refMap +} + +// parseRefFile parses the content of a ref index file. The file may contain +// either the legacy format (digest only) or the extended format +// (imageRef\tdigest). Returns the image reference (empty for legacy format) +// and the digest. +func parseRefFile(data []byte) (imageRef, digest string) { + content := strings.TrimSpace(string(data)) + if content == "" { + return "", "" + } + if idx := strings.IndexByte(content, '\t'); idx >= 0 { + return content[:idx], content[idx+1:] + } + // Legacy format: digest only. + return "", content +} + +// isRootfsEntry returns true if the directory name looks like a cached +// rootfs entry (starts with "sha256-") rather than a special directory. +func isRootfsEntry(name string) bool { + return strings.HasPrefix(name, "sha256-") +} + +// dirNameToDigest converts a filesystem-safe directory name back to a digest. +// "sha256-abc123" → "sha256:abc123". +func dirNameToDigest(name string) string { + return strings.Replace(name, "-", ":", 1) +} + +// dirSize walks a directory tree and returns the total size of all regular +// files. Errors are silently ignored; the returned size is best-effort. +func dirSize(path string) int64 { + var total int64 + _ = filepath.WalkDir(path, func(_ string, d fs.DirEntry, err error) error { + if err != nil { + return nil + } + if d.IsDir() { + return nil + } + info, err := d.Info() + if err != nil { + return nil + } + total += info.Size() + return nil + }) + return total +} + // pathFor converts a digest like "sha256:abc123..." into a filesystem path // inside the cache directory. The colon is replaced to avoid filesystem issues. func (c *Cache) pathFor(digest string) string { @@ -201,28 +420,32 @@ func (c *Cache) StoreRef(imageRef, digest string, cfg *OCIConfig) { c.putConfig(digest, cfg) } -// getRef returns the cached digest for an image reference. +// getRef returns the cached digest for an image reference. It handles +// both the legacy format (digest only) and the extended format +// (imageRef\tdigest). func (c *Cache) getRef(imageRef string) (string, bool) { p := c.refPath(imageRef) data, err := os.ReadFile(p) if err != nil { return "", false } - digest := strings.TrimSpace(string(data)) + _, digest := parseRefFile(data) if digest == "" { return "", false } return digest, true } -// putRef stores the ref→digest mapping as a small file. +// putRef stores the ref→digest mapping as a small file. The file uses the +// extended format "imageRef\tdigest\n" so that List/GC can recover the +// original image reference from the hashed filename. func (c *Cache) putRef(imageRef, digest string) { dir := filepath.Join(c.baseDir, refDir) if err := os.MkdirAll(dir, 0o700); err != nil { return } p := c.refPath(imageRef) - _ = os.WriteFile(p, []byte(digest+"\n"), 0o600) + _ = os.WriteFile(p, []byte(imageRef+"\t"+digest+"\n"), 0o600) } // refPath returns the filesystem path for a ref index entry. The image diff --git a/image/cache_test.go b/image/cache_test.go index 7f9f7e9..a742f0d 100644 --- a/image/cache_test.go +++ b/image/cache_test.go @@ -4,6 +4,8 @@ package image import ( + "crypto/sha256" + "encoding/hex" "os" "path/filepath" "testing" @@ -248,3 +250,297 @@ func TestCache_Get_TouchesMtime(t *testing.T) { assert.True(t, info.ModTime().After(before) || info.ModTime().Equal(before), "Get should update mtime to prevent eviction") } + +// --- List tests --- + +func TestCache_List_Empty(t *testing.T) { + t.Parallel() + + c := NewCache(t.TempDir()) + + entries, err := c.List() + require.NoError(t, err) + assert.Empty(t, entries) +} + +func TestCache_List_NilCache(t *testing.T) { + t.Parallel() + + var c *Cache + entries, err := c.List() + require.NoError(t, err) + assert.Nil(t, entries) +} + +func TestCache_List_WithEntries(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Create two rootfs entries with files. + for _, name := range []string{"sha256-aaa111", "sha256-bbb222"} { + dir := filepath.Join(cacheDir, name) + require.NoError(t, os.MkdirAll(dir, 0o700)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "file.txt"), []byte("hello"), 0o644)) + } + + // Create a ref pointing to one of them (extended format). + refsDir := filepath.Join(cacheDir, "refs") + require.NoError(t, os.MkdirAll(refsDir, 0o700)) + refContent := "ghcr.io/org/image:latest\tsha256:aaa111\n" + refHash := sha256.Sum256([]byte("ghcr.io/org/image:latest")) + refFile := filepath.Join(refsDir, hex.EncodeToString(refHash[:])) + require.NoError(t, os.WriteFile(refFile, []byte(refContent), 0o600)) + + entries, err := c.List() + require.NoError(t, err) + require.Len(t, entries, 2) + + // Find the entry with refs. + var withRefs, orphan CacheEntry + for _, e := range entries { + if len(e.Refs) > 0 { + withRefs = e + } else { + orphan = e + } + } + + assert.Equal(t, "sha256:aaa111", withRefs.Digest) + assert.Equal(t, []string{"ghcr.io/org/image:latest"}, withRefs.Refs) + assert.Greater(t, withRefs.Size, int64(0)) + + assert.Equal(t, "sha256:bbb222", orphan.Digest) + assert.Empty(t, orphan.Refs) +} + +func TestCache_List_SkipsNonRootfs(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Create entries that should be skipped. + for _, name := range []string{"refs", "layers", "tmp-rootfs-abc"} { + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, name), 0o700)) + } + + // Create one real rootfs entry. + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, "sha256-real"), 0o700)) + + entries, err := c.List() + require.NoError(t, err) + require.Len(t, entries, 1) + assert.Equal(t, "sha256:real", entries[0].Digest) +} + +func TestCache_List_LegacyRefFormat(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Create a rootfs entry. + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, "sha256-legacy"), 0o700)) + + // Create a ref in legacy format (digest only, no imageRef). + refsDir := filepath.Join(cacheDir, "refs") + require.NoError(t, os.MkdirAll(refsDir, 0o700)) + refHash := sha256.Sum256([]byte("some-image:latest")) + refFile := filepath.Join(refsDir, hex.EncodeToString(refHash[:])) + require.NoError(t, os.WriteFile(refFile, []byte("sha256:legacy\n"), 0o600)) + + entries, err := c.List() + require.NoError(t, err) + require.Len(t, entries, 1) + + // Legacy format: ref is resolved but imageRef is empty string. + assert.Equal(t, "sha256:legacy", entries[0].Digest) + assert.Equal(t, []string{""}, entries[0].Refs) +} + +// --- GC tests --- + +func TestCache_GC_NilCache(t *testing.T) { + t.Parallel() + + var c *Cache + removed, err := c.GC() + require.NoError(t, err) + assert.Equal(t, 0, removed) +} + +func TestCache_GC_RemovesOrphans(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Create two rootfs entries. + liveDir := filepath.Join(cacheDir, "sha256-live") + orphanDir := filepath.Join(cacheDir, "sha256-orphan") + require.NoError(t, os.MkdirAll(liveDir, 0o700)) + require.NoError(t, os.MkdirAll(orphanDir, 0o700)) + + // Create a ref pointing only to the live entry. + refsDir := filepath.Join(cacheDir, "refs") + require.NoError(t, os.MkdirAll(refsDir, 0o700)) + refHash := sha256.Sum256([]byte("my-image:latest")) + refFile := filepath.Join(refsDir, hex.EncodeToString(refHash[:])) + require.NoError(t, os.WriteFile(refFile, []byte("my-image:latest\tsha256:live\n"), 0o600)) + + removed, err := c.GC() + require.NoError(t, err) + assert.Equal(t, 1, removed) + + // Live entry should remain. + _, err = os.Stat(liveDir) + assert.NoError(t, err) + + // Orphan should be gone. + _, err = os.Stat(orphanDir) + assert.True(t, os.IsNotExist(err)) +} + +func TestCache_GC_PreservesNonRootfs(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Create non-rootfs entries that GC should not touch. + refsDir := filepath.Join(cacheDir, "refs") + layersDir := filepath.Join(cacheDir, "layers") + tmpDir := filepath.Join(cacheDir, "tmp-rootfs-inflight") + require.NoError(t, os.MkdirAll(refsDir, 0o700)) + require.NoError(t, os.MkdirAll(layersDir, 0o700)) + require.NoError(t, os.MkdirAll(tmpDir, 0o700)) + + removed, err := c.GC() + require.NoError(t, err) + assert.Equal(t, 0, removed) + + // All should still exist. + for _, d := range []string{refsDir, layersDir, tmpDir} { + _, err := os.Stat(d) + assert.NoError(t, err, "should not remove %s", d) + } +} + +func TestCache_GC_NoRefs(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Create rootfs entries with no refs at all. + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, "sha256-a"), 0o700)) + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, "sha256-b"), 0o700)) + + removed, err := c.GC() + require.NoError(t, err) + assert.Equal(t, 2, removed) +} + +// --- Purge tests --- + +func TestCache_Purge_NilCache(t *testing.T) { + t.Parallel() + + var c *Cache + err := c.Purge() + require.NoError(t, err) +} + +func TestCache_Purge_RemovesEverything(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Populate cache. + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, "sha256-entry"), 0o700)) + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, "refs"), 0o700)) + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, "layers", "sha256-layer"), 0o700)) + + err := c.Purge() + require.NoError(t, err) + + _, err = os.Stat(cacheDir) + assert.True(t, os.IsNotExist(err)) +} + +func TestCache_Purge_NonExistentDir(t *testing.T) { + t.Parallel() + + c := NewCache(filepath.Join(t.TempDir(), "does-not-exist")) + err := c.Purge() + require.NoError(t, err) +} + +// --- Ref format tests --- + +func TestParseRefFile_ExtendedFormat(t *testing.T) { + t.Parallel() + + data := []byte("ghcr.io/org/image:latest\tsha256:abc123\n") + imageRef, digest := parseRefFile(data) + assert.Equal(t, "ghcr.io/org/image:latest", imageRef) + assert.Equal(t, "sha256:abc123", digest) +} + +func TestParseRefFile_LegacyFormat(t *testing.T) { + t.Parallel() + + data := []byte("sha256:abc123\n") + imageRef, digest := parseRefFile(data) + assert.Equal(t, "", imageRef) + assert.Equal(t, "sha256:abc123", digest) +} + +func TestParseRefFile_Empty(t *testing.T) { + t.Parallel() + + imageRef, digest := parseRefFile([]byte("")) + assert.Equal(t, "", imageRef) + assert.Equal(t, "", digest) +} + +func TestPutRef_ExtendedFormat(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + c.putRef("ghcr.io/org/image:v1", "sha256:deadbeef") + + // Read the file back and verify extended format. + p := c.refPath("ghcr.io/org/image:v1") + data, err := os.ReadFile(p) + require.NoError(t, err) + assert.Equal(t, "ghcr.io/org/image:v1\tsha256:deadbeef\n", string(data)) + + // getRef should still work. + digest, ok := c.getRef("ghcr.io/org/image:v1") + assert.True(t, ok) + assert.Equal(t, "sha256:deadbeef", digest) +} + +func TestGetRef_BackwardCompatible(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Write a legacy-format ref file manually. + refsDir := filepath.Join(cacheDir, "refs") + require.NoError(t, os.MkdirAll(refsDir, 0o700)) + p := c.refPath("old-image:latest") + require.NoError(t, os.WriteFile(p, []byte("sha256:olddigest\n"), 0o600)) + + // getRef should parse the legacy format. + digest, ok := c.getRef("old-image:latest") + assert.True(t, ok) + assert.Equal(t, "sha256:olddigest", digest) +} diff --git a/image/layer_cache.go b/image/layer_cache.go index 20443d1..51d6484 100644 --- a/image/layer_cache.go +++ b/image/layer_cache.go @@ -5,6 +5,7 @@ package image import ( "fmt" + "io/fs" "os" "path/filepath" "time" @@ -118,6 +119,36 @@ func (lc *LayerCache) Evict(maxAge time.Duration) (int, error) { return removed, nil } +// Size returns the total size in bytes of all cached layer entries. +// Returns 0 if the layer cache directory does not exist. +func (lc *LayerCache) Size() (int64, error) { + if lc == nil { + return 0, nil + } + var total int64 + err := filepath.WalkDir(lc.baseDir, func(_ string, d fs.DirEntry, err error) error { + if err != nil { + if os.IsNotExist(err) { + return fs.SkipAll + } + return nil + } + if d.IsDir() { + return nil + } + info, err := d.Info() + if err != nil { + return nil + } + total += info.Size() + return nil + }) + if err != nil { + return total, fmt.Errorf("walk layer cache: %w", err) + } + return total, nil +} + // pathFor converts a DiffID into a filesystem path inside the cache directory. func (lc *LayerCache) pathFor(diffID v1.Hash) string { safe := diffID.Algorithm + "-" + diffID.Hex diff --git a/image/layer_cache_test.go b/image/layer_cache_test.go index eb3ead3..06c8600 100644 --- a/image/layer_cache_test.go +++ b/image/layer_cache_test.go @@ -195,3 +195,54 @@ func TestLayerCache_Evict_NonExistentDir(t *testing.T) { require.NoError(t, err) assert.Equal(t, 0, removed) } + +// --- Size tests --- + +func TestLayerCache_Size_Empty(t *testing.T) { + t.Parallel() + + lc := NewLayerCache(t.TempDir()) + + size, err := lc.Size() + require.NoError(t, err) + assert.Equal(t, int64(0), size) +} + +func TestLayerCache_Size_Nil(t *testing.T) { + t.Parallel() + + var lc *LayerCache + size, err := lc.Size() + require.NoError(t, err) + assert.Equal(t, int64(0), size) +} + +func TestLayerCache_Size_NonExistentDir(t *testing.T) { + t.Parallel() + + lc := NewLayerCache(filepath.Join(t.TempDir(), "does-not-exist")) + + size, err := lc.Size() + require.NoError(t, err) + assert.Equal(t, int64(0), size) +} + +func TestLayerCache_Size_WithEntries(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + lc := NewLayerCache(cacheDir) + + // Create two layer directories with known file sizes. + layer1 := filepath.Join(cacheDir, "sha256-layer1") + require.NoError(t, os.MkdirAll(layer1, 0o700)) + require.NoError(t, os.WriteFile(filepath.Join(layer1, "data"), make([]byte, 1000), 0o644)) + + layer2 := filepath.Join(cacheDir, "sha256-layer2") + require.NoError(t, os.MkdirAll(layer2, 0o700)) + require.NoError(t, os.WriteFile(filepath.Join(layer2, "data"), make([]byte, 2000), 0o644)) + + size, err := lc.Size() + require.NoError(t, err) + assert.Equal(t, int64(3000), size) +} From 8b0e6f84297c25272d8a61c3897c6d0fc61c0267 Mon Sep 17 00:00:00 2001 From: Juan Antonio Osorio Date: Tue, 24 Mar 2026 11:08:17 +0000 Subject: [PATCH 2/2] Address review findings for cache management API - GC now aborts instead of deleting everything when the refs directory is unreadable (liveDigests returns error instead of nil map) - Filter empty imageRef from Refs in buildRefMap (legacy-format refs no longer produce empty strings in CacheEntry.Refs) - Harmonize pathFor to use Replace(..., 1) matching dirNameToDigest - Add path traversal defense in pathFor rejecting digests with / or .. - Fix ModTime doc comment to be more precise - Add tests: GC with non-existent dir, multiple refs to same digest, corrupt ref files - Tighten List size assertion to exact byte count Co-Authored-By: Claude Opus 4.6 (1M context) --- image/cache.go | 47 ++++++++++++++++++++------ image/cache_test.go | 81 ++++++++++++++++++++++++++++++++++++++------- 2 files changed, 106 insertions(+), 22 deletions(-) diff --git a/image/cache.go b/image/cache.go index 9f12068..5377486 100644 --- a/image/cache.go +++ b/image/cache.go @@ -154,7 +154,8 @@ type CacheEntry struct { Path string // Size is the total size in bytes of all files in the rootfs directory. Size int64 - // ModTime is the last time this entry was accessed (touched by Get). + // ModTime is the modification time of the cache directory entry. + // Updated by Get on cache hits; otherwise reflects creation time. ModTime time.Time // Refs lists image references (e.g. "ghcr.io/org/image:latest") that // point to this digest via the ref index. Empty for orphaned entries. @@ -214,6 +215,11 @@ func (c *Cache) List() ([]CacheEntry, error) { // Unlike [Evict] (which is time-based), GC is reachability-based: an entry // survives if and only if at least one ref points to its digest. // Returns the number of entries removed. +// +// GC is not safe for concurrent use with [Pull]. If another process is +// pulling an image while GC runs, the pulled entry may be collected before +// the ref index is updated. The consequence is a cache miss on the next +// run, not data corruption. func (c *Cache) GC() (int, error) { if c == nil { return 0, nil @@ -227,7 +233,11 @@ func (c *Cache) GC() (int, error) { return 0, fmt.Errorf("read cache dir: %w", err) } - live := c.liveDigests() + live, err := c.liveDigests() + if err != nil { + return 0, fmt.Errorf("enumerate live digests: %w", err) + } + removed := 0 for _, entry := range entries { @@ -264,12 +274,18 @@ func (c *Cache) Purge() error { } // liveDigests returns the set of digests referenced by at least one ref -// index entry. -func (c *Cache) liveDigests() map[string]bool { +// index entry. Returns a nil map and nil error when the refs directory +// does not exist (no images have been pulled yet). Returns a non-nil +// error if the refs directory exists but cannot be read, so callers +// can abort rather than treating all entries as orphaned. +func (c *Cache) liveDigests() (map[string]bool, error) { refsDir := filepath.Join(c.baseDir, refDir) entries, err := os.ReadDir(refsDir) if err != nil { - return nil + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf("read refs dir: %w", err) } live := make(map[string]bool, len(entries)) @@ -286,7 +302,7 @@ func (c *Cache) liveDigests() map[string]bool { live[digest] = true } } - return live + return live, nil } // buildRefMap returns a map from digest to the list of image references @@ -308,7 +324,13 @@ func (c *Cache) buildRefMap() map[string][]string { continue } imageRef, digest := parseRefFile(data) - if digest != "" { + if digest == "" { + continue + } + // Skip empty image refs from legacy-format files. The entry still + // counts as referenced for GC (via liveDigests), but we don't add + // an empty string to the Refs slice. + if imageRef != "" { refMap[digest] = append(refMap[digest], imageRef) } } @@ -365,10 +387,15 @@ func dirSize(path string) int64 { } // pathFor converts a digest like "sha256:abc123..." into a filesystem path -// inside the cache directory. The colon is replaced to avoid filesystem issues. +// inside the cache directory. Only the first colon is replaced so the +// round-trip with [dirNameToDigest] is symmetric. func (c *Cache) pathFor(digest string) string { - // Replace "sha256:" prefix with "sha256-" for filesystem safety. - safe := strings.ReplaceAll(digest, ":", "-") + if strings.ContainsAny(digest, "/\\") || strings.Contains(digest, "..") { + // Defense-in-depth: reject digests that could escape the cache dir. + // Normal OCI digests are "algorithm:hex" with no path separators. + return filepath.Join(c.baseDir, "invalid-digest") + } + safe := strings.Replace(digest, ":", "-", 1) return filepath.Join(c.baseDir, safe) } diff --git a/image/cache_test.go b/image/cache_test.go index a742f0d..d7c2498 100644 --- a/image/cache_test.go +++ b/image/cache_test.go @@ -297,21 +297,19 @@ func TestCache_List_WithEntries(t *testing.T) { require.NoError(t, err) require.Len(t, entries, 2) - // Find the entry with refs. - var withRefs, orphan CacheEntry + // Build a map for order-independent assertions. + byDigest := make(map[string]CacheEntry) for _, e := range entries { - if len(e.Refs) > 0 { - withRefs = e - } else { - orphan = e - } + byDigest[e.Digest] = e } - assert.Equal(t, "sha256:aaa111", withRefs.Digest) + withRefs, ok := byDigest["sha256:aaa111"] + require.True(t, ok, "expected entry for sha256:aaa111") assert.Equal(t, []string{"ghcr.io/org/image:latest"}, withRefs.Refs) - assert.Greater(t, withRefs.Size, int64(0)) + assert.Equal(t, int64(5), withRefs.Size) // "hello" is 5 bytes - assert.Equal(t, "sha256:bbb222", orphan.Digest) + orphan, ok := byDigest["sha256:bbb222"] + require.True(t, ok, "expected entry for sha256:bbb222") assert.Empty(t, orphan.Refs) } @@ -355,9 +353,10 @@ func TestCache_List_LegacyRefFormat(t *testing.T) { require.NoError(t, err) require.Len(t, entries, 1) - // Legacy format: ref is resolved but imageRef is empty string. + // Legacy format: entry is referenced (not orphaned) but imageRef + // is not recoverable, so Refs is empty (no empty strings). assert.Equal(t, "sha256:legacy", entries[0].Digest) - assert.Equal(t, []string{""}, entries[0].Refs) + assert.Empty(t, entries[0].Refs) } // --- GC tests --- @@ -527,6 +526,64 @@ func TestPutRef_ExtendedFormat(t *testing.T) { assert.Equal(t, "sha256:deadbeef", digest) } +func TestCache_GC_NonExistentDir(t *testing.T) { + t.Parallel() + + c := NewCache(filepath.Join(t.TempDir(), "does-not-exist")) + + removed, err := c.GC() + require.NoError(t, err) + assert.Equal(t, 0, removed) +} + +func TestCache_GC_MultipleRefsToSameDigest(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Create one rootfs entry. + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, "sha256-shared"), 0o700)) + + // Create two refs pointing to the same digest. + refsDir := filepath.Join(cacheDir, "refs") + require.NoError(t, os.MkdirAll(refsDir, 0o700)) + for _, ref := range []string{"image:latest", "image:v1"} { + h := sha256.Sum256([]byte(ref)) + p := filepath.Join(refsDir, hex.EncodeToString(h[:])) + require.NoError(t, os.WriteFile(p, []byte(ref+"\tsha256:shared\n"), 0o600)) + } + + removed, err := c.GC() + require.NoError(t, err) + assert.Equal(t, 0, removed) + + // Entry should survive. + _, err = os.Stat(filepath.Join(cacheDir, "sha256-shared")) + assert.NoError(t, err) +} + +func TestCache_GC_CorruptRefFiles(t *testing.T) { + t.Parallel() + + cacheDir := t.TempDir() + c := NewCache(cacheDir) + + // Create a rootfs entry. + require.NoError(t, os.MkdirAll(filepath.Join(cacheDir, "sha256-orphan"), 0o700)) + + // Create refs dir with corrupt files (empty, whitespace-only). + refsDir := filepath.Join(cacheDir, "refs") + require.NoError(t, os.MkdirAll(refsDir, 0o700)) + require.NoError(t, os.WriteFile(filepath.Join(refsDir, "empty"), []byte(""), 0o600)) + require.NoError(t, os.WriteFile(filepath.Join(refsDir, "whitespace"), []byte(" \n"), 0o600)) + + // Corrupt refs should not protect the orphaned entry. + removed, err := c.GC() + require.NoError(t, err) + assert.Equal(t, 1, removed) +} + func TestGetRef_BackwardCompatible(t *testing.T) { t.Parallel()