From 769faa0f5e3d34a2c9c17baa2748b87de9b7a3c7 Mon Sep 17 00:00:00 2001 From: Elizabeth Worstell Date: Mon, 16 Mar 2026 17:33:36 -0700 Subject: [PATCH] fix: serve cached snapshots immediately, regenerate in background The snapshot request path was fetching from upstream synchronously, then invalidating the cached snapshot because its Last-Modified predated the fetch that just ran. On active repos this created a regeneration loop where every request rebuilt the snapshot. Instead, always serve a cached snapshot if one exists and move the upstream fetch to a background goroutine. The workstation's git-fetch after extracting the snapshot goes through cachew's git http-backend path, which forwards to upstream when refs are stale, so the workstation always ends up fully up to date. Also reduce the default mirror-snapshot-interval from 6h to 2h so that new pods restore a fresher mirror snapshot and the post-restore delta fetch is smaller. Amp-Thread-ID: https://ampcode.com/threads/T-019cf8fc-dbea-761b-8933-a781adb473bb Co-authored-by: Amp --- internal/strategy/git/git.go | 2 +- internal/strategy/git/snapshot.go | 35 +++++++++++++++---------------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/internal/strategy/git/git.go b/internal/strategy/git/git.go index f719bb3..b71e415 100644 --- a/internal/strategy/git/git.go +++ b/internal/strategy/git/git.go @@ -35,7 +35,7 @@ func Register(r *strategy.Registry, scheduler jobscheduler.Provider, cloneManage type Config struct { SnapshotInterval time.Duration `hcl:"snapshot-interval,optional" help:"How often to generate tar.zstd workstation snapshots. 0 disables snapshots." default:"0"` - MirrorSnapshotInterval time.Duration `hcl:"mirror-snapshot-interval,optional" help:"How often to generate mirror snapshots for pod bootstrap. 0 uses snapshot-interval. Defaults to 6h." default:"6h"` + MirrorSnapshotInterval time.Duration `hcl:"mirror-snapshot-interval,optional" help:"How often to generate mirror snapshots for pod bootstrap. 0 uses snapshot-interval. Defaults to 2h." default:"2h"` RepackInterval time.Duration `hcl:"repack-interval,optional" help:"How often to run full repack. 0 disables." default:"0"` // ServerURL is embedded as remote.origin.url in snapshots so git pull goes through cachew. ServerURL string `hcl:"server-url,optional" help:"Base URL of this cachew instance, embedded in snapshot remote URLs." default:"${CACHEW_URL}"` diff --git a/internal/strategy/git/snapshot.go b/internal/strategy/git/snapshot.go index 45079e8..4690ac0 100644 --- a/internal/strategy/git/snapshot.go +++ b/internal/strategy/git/snapshot.go @@ -10,7 +10,6 @@ import ( "path/filepath" "strings" "sync" - "time" "github.com/alecthomas/errors" @@ -190,15 +189,21 @@ func (s *Strategy) handleSnapshotRequest(w http.ResponseWriter, r *http.Request, http.Error(w, "Repository unavailable", http.StatusServiceUnavailable) return } + // Fetch in the background to keep the mirror fresh for subsequent + // git-fetch/git-pull operations through cachew, but don't block + // snapshot serving on it. refsStale, err := s.checkRefsStale(ctx, repo) if err != nil { logger.WarnContext(ctx, "Failed to check upstream refs", "upstream", upstreamURL, "error", err) } if refsStale { - logger.InfoContext(ctx, "Refs stale for snapshot request, fetching", "upstream", upstreamURL) - if err := repo.Fetch(ctx); err != nil { - logger.WarnContext(ctx, "Fetch for snapshot failed", "upstream", upstreamURL, "error", err) - } + logger.InfoContext(ctx, "Refs stale for snapshot request, fetching in background", "upstream", upstreamURL) + go func() { + bgCtx := context.WithoutCancel(ctx) + if err := repo.Fetch(bgCtx); err != nil { + logger.WarnContext(bgCtx, "Background fetch for snapshot failed", "upstream", upstreamURL, "error", err) + } + }() } cacheKey := snapshotCacheKey(upstreamURL) @@ -210,20 +215,14 @@ func (s *Strategy) handleSnapshotRequest(w http.ResponseWriter, r *http.Request, return } - // Only serve the cached snapshot if it was generated after the mirror's - // last successful fetch. Otherwise regenerate from the fresh mirror. + // Always serve a cached snapshot if one exists. The workstation will + // git-fetch through cachew after extracting the snapshot, which picks + // up any commits that arrived since the snapshot was built. Regeneration + // happens in the background via the periodic snapshot job and the + // background upload in writeSnapshotSpool, keeping the cached snapshot + // reasonably fresh without blocking requests. if reader != nil { - stale := true - if lastMod := headers.Get("Last-Modified"); lastMod != "" { - if t, parseErr := time.Parse(http.TimeFormat, lastMod); parseErr == nil { - stale = repo.LastFetch().After(t) - } - } - if stale { - logger.InfoContext(ctx, "Cached snapshot predates last fetch, regenerating", "upstream", upstreamURL) - _ = reader.Close() - reader = nil - } + logger.DebugContext(ctx, "Serving cached snapshot", "upstream", upstreamURL) } if reader == nil {