diff --git a/.github/workflows/preview-build.yml b/.github/workflows/preview-build.yml new file mode 100644 index 0000000..c009e39 --- /dev/null +++ b/.github/workflows/preview-build.yml @@ -0,0 +1,81 @@ +name: 🔬 Preview build + +# Manual-dispatch image build for testing branches before they land on dev. +# Triggering on `fix/abort-kills-merge-and-cleanup-race` publishes +# `ghcr.io/mpecan/gha-cache-server:preview-` so a downstream +# deployment can pin it by digest. No lint/test gate — this is explicitly +# for in-cluster validation of in-flight patches; the full CI still runs on +# push and on the eventual PR. + +on: + workflow_dispatch: + inputs: + tag-suffix: + description: "Optional extra tag suffix (appended after the short sha)" + required: false + default: "" + +concurrency: + group: preview-build-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + name: 🚀 Build and push preview + permissions: + packages: write + contents: read + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - name: Compute tags + id: tags + run: | + SHORT_SHA=$(git rev-parse --short HEAD) + BRANCH_SLUG=$(echo "${{ github.ref_name }}" | tr '/' '-') + SUFFIX="${{ inputs.tag-suffix }}" + TAGS="ghcr.io/${{ github.repository }}:preview-${SHORT_SHA}" + TAGS="${TAGS} + ghcr.io/${{ github.repository }}:preview-${BRANCH_SLUG}" + if [ -n "${SUFFIX}" ]; then + TAGS="${TAGS} + ghcr.io/${{ github.repository }}:preview-${SHORT_SHA}-${SUFFIX}" + fi + { + echo "tags<> "$GITHUB_OUTPUT" + + - name: Login to GitHub Container Registry + uses: docker/login-action@v4 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v4 + + - name: Build and push + id: build + uses: docker/build-push-action@v7 + with: + context: . + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.tags.outputs.tags }} + build-args: | + BUILD_HASH=${{ steps.tags.outputs.short_sha }} + + - name: Print digest + run: | + echo "Image digest: ${{ steps.build.outputs.digest }}" + echo + echo "Pin in a deployment with:" + echo " image: ghcr.io/${{ github.repository }}@${{ steps.build.outputs.digest }}" diff --git a/lib/storage.ts b/lib/storage.ts index baea1d7..d64bd33 100644 --- a/lib/storage.ts +++ b/lib/storage.ts @@ -17,6 +17,7 @@ import { DeleteObjectsCommand, GetObjectCommand, HeadBucketCommand, + HeadObjectCommand, ListObjectsV2Command, S3Client, } from '@aws-sdk/client-s3' @@ -465,32 +466,59 @@ export class Storage { } async getCacheEntryWithDownloadUrl(args: Parameters[0]) { - const cacheEntry = await this.matchCacheEntry(args) - if (!cacheEntry) return + // Retry matching: if the best match points at missing storage (e.g. the + // backend was wiped, or an upload never finalized), purge that entry and + // fall back to the next candidate. Without this, BuildKit gets a download + // URL that 404s mid-build instead of a clean cache miss. + for (let attempt = 0; attempt < 10; attempt++) { + const cacheEntry = await this.matchCacheEntry(args) + if (!cacheEntry) return + + const location = await this.db + .selectFrom('storage_locations') + .where('id', '=', cacheEntry.match.locationId) + .select(['folderName', 'mergedAt', 'partCount', 'partsDeletedAt']) + .executeTakeFirst() + + if (!location || !(await this.storageHasData(location))) { + logger.warn( + `Cache entry ${cacheEntry.match.id} (${cacheEntry.match.key}) points at missing storage, purging.`, + ) + await this.db.deleteFrom('cache_entries').where('id', '=', cacheEntry.match.id).execute() + continue + } + + const defaultUrl = `${env.API_BASE_URL}/download/${cacheEntry.match.id}` + + if (!env.ENABLE_DIRECT_DOWNLOADS || !this.adapter.createDownloadUrl) + return { + downloadUrl: defaultUrl, + cacheEntry: cacheEntry.match, + } - const defaultUrl = `${env.API_BASE_URL}/download/${cacheEntry.match.id}` + const downloadUrl = location.mergedAt + ? await this.adapter.createDownloadUrl(`${location.folderName}/merged`) + : defaultUrl - if (!env.ENABLE_DIRECT_DOWNLOADS || !this.adapter.createDownloadUrl) return { - downloadUrl: defaultUrl, + downloadUrl, cacheEntry: cacheEntry.match, } + } + } - const location = await this.db - .selectFrom('storage_locations') - .where('id', '=', cacheEntry.match.locationId) - .select(['folderName', 'mergedAt']) - .executeTakeFirst() - if (!location) throw new Error('Storage location not found') + private async storageHasData(location: { + folderName: string + mergedAt: number | null + partCount: number + partsDeletedAt: number | null + }) { + if (location.mergedAt) return this.adapter.objectExists(`${location.folderName}/merged`) - const downloadUrl = location.mergedAt - ? await this.adapter.createDownloadUrl(`${location.folderName}/merged`) - : defaultUrl + if (location.partsDeletedAt) return false - return { - downloadUrl, - cacheEntry: cacheEntry.match, - } + const actualPartCount = await this.adapter.countFilesInFolder(`${location.folderName}/parts`) + return actualPartCount >= location.partCount } } @@ -501,6 +529,7 @@ interface StorageAdapter { uploadStream(objectName: string, stream: Readable): Promise deleteFolder(folderName: string): Promise countFilesInFolder(folderName: string): Promise + objectExists(objectName: string): Promise createDownloadUrl?(objectName: string): Promise clear(): Promise } @@ -627,6 +656,26 @@ class S3Adapter implements StorageAdapter { return listResponse.KeyCount ?? 0 } + async objectExists(objectName: string) { + try { + await this.s3.send( + new HeadObjectCommand({ + Bucket: this.bucket, + Key: `${this.keyPrefix}/${objectName}`, + }), + ) + return true + } catch (err: any) { + if ( + err.name === 'NotFound' || + err.name === 'NoSuchKey' || + err.$metadata?.httpStatusCode === 404 + ) + return false + throw err + } + } + async createDownloadUrl(objectName: string) { return getSignedUrl( this.s3, @@ -710,6 +759,15 @@ class FileSystemAdapter implements StorageAdapter { throw err } } + + async objectExists(objectName: string) { + try { + await fs.access(this.safePath(objectName)) + return true + } catch { + return false + } + } } class GcsAdapter implements StorageAdapter { @@ -777,6 +835,11 @@ class GcsAdapter implements StorageAdapter { .then((res) => res[0].length) } + async objectExists(objectName: string) { + const [exists] = await this.bucket.file(`${this.keyPrefix}/${objectName}`).exists() + return exists + } + async createDownloadUrl(objectName: string) { return this.bucket .file(`${this.keyPrefix}/${objectName}`) diff --git a/tests/stale-cache.test.ts b/tests/stale-cache.test.ts index 8c6e813..3da6762 100644 --- a/tests/stale-cache.test.ts +++ b/tests/stale-cache.test.ts @@ -73,4 +73,37 @@ describe('stale cache entry handling (missing storage objects)', () => { expect(missKey2).toBeUndefined() }, ) + + test( + 'falls back to a valid restore key when the best match has missing storage', + { timeout: 30_000 }, + async () => { + // Seed an entry we will later wipe from storage (keeping the DB row). + const staleContents = crypto.randomBytes(1024) + await fs.writeFile(testFilePath, staleContents) + await saveCache([testFilePath], 'restore-fallback-stale') + await fs.rm(testFilePath) + + // Let any background merge settle before wiping storage. + await new Promise((resolve) => setTimeout(resolve, 2000)) + await adapter.clear() + + // Seed a second entry whose storage is intact. Because this row is the + // most recently updated, matchCacheEntry prefers 'restore-fallback-stale' + // (our first restore key) only if we put it first; to force the stale + // row to win we list it ahead of the valid one in restoreKeys. + const validContents = crypto.randomBytes(1024) + await fs.writeFile(testFilePath, validContents) + await saveCache([testFilePath], 'restore-fallback-valid') + await fs.rm(testFilePath) + + // Primary miss, restore keys: stale first (DB row but no storage), then valid. + // The fix must purge the stale row and fall through to the valid one. + const hitKey = await restoreCache([testFilePath], 'restore-fallback-missing-primary', [ + 'restore-fallback-stale', + 'restore-fallback-valid', + ]) + expect(hitKey).toBe('restore-fallback-valid') + }, + ) })