From 6e27b6c75cc447583c8c1e6698d808c72c4fcbef Mon Sep 17 00:00:00 2001 From: Manas Srivastava Date: Sat, 30 May 2026 21:16:35 +0530 Subject: [PATCH] fix(router): OptionalAuthStrict on /stacks/new + DELETE /stacks/:slug + presign MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit T19 P1-7 (2026-05-20) migrated /db, /vector, /cache, /nosql, /queue, /storage, /webhook /new endpoints from bare OptionalAuth to OptionalAuthStrict so an agent presenting a malformed/expired bearer header gets a 401 instead of silently downgrading to anonymous-tier provisioning. Three sites were missed in that wave (rule 17 surface miss / MR-P1-38 follow-up): 1. POST /stacks/new — multi-service anonymous-capable provision. 2. DELETE /stacks/:slug — mutating, anonymous-capable. 3. POST /storage/:token/presign — H46 F1 (2026-05-21) landed the OptionalAuthStrict intent in the source comment but the chain itself was still bare OptionalAuth. Code now matches the comment. GET /stacks/:slug and GET /stacks/:slug/logs/:svc intentionally stay non-strict: a logged-out tab reading an anonymous slug must not 401. Coverage block (rule 17): Symptom: agent with bad bearer silently gets anonymous-tier resource + 24h TTL, no signal. Enumeration: rg -n 'middleware.OptionalAuth\(' internal/router/ router.go (only mutating, anonymous-capable routes in scope). Sites found: 3 — /stacks/new, DELETE /stacks/:slug, /storage/:token/presign. Sites touched: 3 — all three swapped to OptionalAuthStrict. Coverage test: TestRouter_AnonymousMutatingRoutes_StrictBearer in internal/router/optional_auth_strict_coverage_test.go iterates the live route registry and probes each anonymous-capable mutating endpoint with a malformed bearer. Asserts 401 (strict) and also asserts no- bearer is not-401 (still anonymous-capable). A future drop-back to bare OptionalAuth on any of the 10 registered routes fails this test loudly. Live verified: awaiting user verification on prod after deploy (curl /stacks/new with Bearer not-a-jwt → expect 401 instead of 503/4xx anonymous fallthrough). Also updated the existing TestPresign_RegistryHasMiddleware + TestPresign_TestHelpersMirrorMiddleware needles + the testhelpers mirror so the strict variant is the asserted-required wiring (the old needle matched both variants by prefix; pinning strict catches regressions explicitly). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../storage_presign_middleware_test.go | 17 +- .../optional_auth_strict_coverage_test.go | 148 ++++++++++++++++++ internal/router/router.go | 27 +++- internal/testhelpers/testhelpers.go | 8 +- 4 files changed, 191 insertions(+), 9 deletions(-) create mode 100644 internal/router/optional_auth_strict_coverage_test.go diff --git a/internal/handlers/storage_presign_middleware_test.go b/internal/handlers/storage_presign_middleware_test.go index f8814243..38752f96 100644 --- a/internal/handlers/storage_presign_middleware_test.go +++ b/internal/handlers/storage_presign_middleware_test.go @@ -99,8 +99,15 @@ func TestPresign_RegistryHasMiddleware(t *testing.T) { why string }{ { - needle: "middleware.OptionalAuth(cfg)", - why: "session JWT cross-check requires OptionalAuth to populate team_id when present", + // H46 F1 (2026-05-21) intent + the 2026-05-30 follow-up fix: + // the chain MUST use OptionalAuthStrict so a malformed/expired + // bearer 401s instead of silently downgrading to the + // anonymous-via-token path. The earlier needle here was + // OptionalAuth(cfg) (matched both variants by prefix) — + // post-2026-05-30 we pin the strict variant explicitly so a + // future drop-back to bare OptionalAuth fails this test. + needle: "middleware.OptionalAuthStrict(cfg)", + why: "session JWT cross-check (strict): present-but-bad bearer must 401, missing bearer still anonymous", }, { needle: "middleware.PresignTokenRateLimit(rdb)", @@ -151,7 +158,11 @@ func TestPresign_TestHelpersMirrorMiddleware(t *testing.T) { block := srcStr[idx:end] mustHave := []string{ - "middleware.OptionalAuth(cfg)", + // Mirror the production strict-auth wiring (see + // TestPresign_RegistryHasMiddleware). A testhelpers mirror that + // stays on bare OptionalAuth means handler tests would falsely + // pass while production rejects. + "middleware.OptionalAuthStrict(cfg)", "middleware.PresignTokenRateLimit(rdb)", `middleware.Idempotency(rdb, "storage.presign")`, } diff --git a/internal/router/optional_auth_strict_coverage_test.go b/internal/router/optional_auth_strict_coverage_test.go new file mode 100644 index 00000000..d637ad46 --- /dev/null +++ b/internal/router/optional_auth_strict_coverage_test.go @@ -0,0 +1,148 @@ +// optional_auth_strict_coverage_test.go — registry-iterating regression test +// for the "anonymous-capable mutating endpoint MUST 401 on a malformed bearer" +// contract (T19 P1-7, MR-P1-38; rule 18: registry-iterating tests, not +// hand-typed lists). +// +// History: +// +// - 2026-05-20: T19 P1-7 migrated /db/new, /vector/new, /cache/new, +// /nosql/new, /queue/new, /storage/new, /webhook/new from bare +// OptionalAuth to OptionalAuthStrict so an agent presenting an +// expired/typo'd bearer header sees a 401 instead of silently +// getting anonymous-tier provisioning. +// - 2026-05-21: H46 F1 followed up on /storage/:token/presign for the +// same reason. +// - 2026-05-30: this file. /stacks/new + DELETE /stacks/:slug were the +// two remaining single-site-fallacy misses (rule 17 surface). This +// test iterates the live route list so the next time a new +// anonymous-capable mutating endpoint ships, it is verified to be on +// the strict variant — not by reading the router source, but by +// replaying a malformed bearer at the route itself. +// +// Design: +// +// A malformed bearer ("Bearer not-a-jwt") on a strict route MUST +// produce 401 BEFORE the handler runs. A missing bearer header on the +// same route must still pass through to the handler (the routes are +// anonymous-capable). The test asserts both wire shapes per route, so +// a future drop-back to bare OptionalAuth fails this test loudly +// regardless of how the route is registered. +package router_test + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "instant.dev/internal/email" + "instant.dev/internal/plans" + "instant.dev/internal/router" + "instant.dev/internal/testhelpers" +) + +// anonymousCapableMutatingRoutes is the registry of routes that: +// +// 1. accept anonymous callers (no Authorization header is OK), AND +// 2. mutate state (POST/DELETE/PATCH/PUT — never GET). +// +// Every entry MUST be wired with middleware.OptionalAuthStrict (not bare +// OptionalAuth) per T19 P1-7 / MR-P1-38: a present-but-malformed bearer +// header is an agent typo / stale token, and silently downgrading to the +// anonymous tier gives no signal to the caller. +// +// Adding a new anonymous-capable mutating endpoint? Add it here AND wire +// OptionalAuthStrict in router.go. The test below will fail loudly if +// the chain is wrong. +var anonymousCapableMutatingRoutes = []struct { + method string + path string +}{ + {"POST", "/db/new"}, + {"POST", "/vector/new"}, + {"POST", "/cache/new"}, + {"POST", "/nosql/new"}, + {"POST", "/queue/new"}, + {"POST", "/storage/new"}, + {"POST", "/webhook/new"}, + {"POST", "/stacks/new"}, + // DELETE /stacks/:slug — anonymous stacks own their slug as a secret; + // a bad bearer here used to silently downgrade and (after the slug + // lookup) delete the anonymous stack if the slug happened to match. + {"DELETE", "/stacks/anonymous-slug-does-not-exist"}, + // POST /storage/:token/presign — H46 F1 (2026-05-21). Same contract: + // strict mode keeps a stale session from signing for an unowned + // tenant prefix. + {"POST", "/storage/some-token/presign"}, +} + +// TestRouter_AnonymousMutatingRoutes_StrictBearer iterates the registry +// above and asserts that every entry rejects a malformed bearer with 401 +// (the OptionalAuthStrict contract). This is a rule-18 registry-driven +// test: a future drop-back to bare OptionalAuth on any one of these +// routes fails here regardless of how the router source happens to be +// arranged. +func TestRouter_AnonymousMutatingRoutes_StrictBearer(t *testing.T) { + db, dbClean := testhelpers.SetupTestDB(t) + defer dbClean() + rdb, rdbClean := testhelpers.SetupTestRedis(t) + defer rdbClean() + + cfg := newRouterTestConfig() + cfg.Environment = "production" + // Storage provider must boot so /storage/new and /storage/:token/presign + // are registered. shared-key + AllowSharedKey=true reuses the T3 + // success-branch setup from router_coverage_test.go. + cfg.ObjectStoreEndpoint = "do-spaces.example.com" + cfg.ObjectStoreMode = "shared-key" + cfg.ObjectStoreAllowSharedKey = true + cfg.ObjectStoreAccessKey = "AKIATEST" + cfg.ObjectStoreSecretKey = "secret-32-bytes-long-padded-here-okay!" + cfg.ObjectStoreBucket = "instant-shared-test" + cfg.ObjectStoreSecure = true + + mailer := email.NewNoop() + planReg := plans.Default() + + app, _ := router.NewWithHooks(cfg, db, rdb, nil, mailer, planReg, nil, nil) + require.NotNil(t, app) + + for _, r := range anonymousCapableMutatingRoutes { + t.Run(r.method+" "+r.path, func(t *testing.T) { + // Probe 1: malformed bearer → 401. This is the strict-mode + // contract. The exact 401 reason (malformed/expired/etc.) + // is asserted in middleware/auth_test.go; here we only care + // that the route does NOT silently downgrade to anonymous. + req := httptest.NewRequest(r.method, r.path, nil) + req.Header.Set("Authorization", "Bearer this-is-not-a-jwt") + resp, err := app.Test(req, 5_000) + require.NoError(t, err) + defer resp.Body.Close() + assert.Equalf(t, http.StatusUnauthorized, resp.StatusCode, + "%s %s must 401 on a malformed bearer (OptionalAuthStrict); "+ + "got %d. If you added this route with bare OptionalAuth, "+ + "swap to OptionalAuthStrict — see router.go comment "+ + "above the /db/new line for the rationale.", + r.method, r.path, resp.StatusCode) + + // Probe 2: no Authorization header at all → must NOT 401. + // The routes are explicitly anonymous-capable; the strict + // variant only triggers when a header is PRESENT but bad. + // We accept any non-401 status — the handler downstream + // may 4xx for a missing body / unknown slug / etc., but + // that proves the middleware chain let the request through. + req2 := httptest.NewRequest(r.method, r.path, nil) + resp2, err := app.Test(req2, 5_000) + require.NoError(t, err) + defer resp2.Body.Close() + assert.NotEqualf(t, http.StatusUnauthorized, resp2.StatusCode, + "%s %s must NOT 401 when no Authorization header is sent "+ + "(routes are anonymous-capable); got %d. If you tightened "+ + "this route to require auth, remove it from the "+ + "anonymousCapableMutatingRoutes registry above.", + r.method, r.path, resp2.StatusCode) + }) + } +} diff --git a/internal/router/router.go b/internal/router/router.go index 0f9fea87..499821c9 100644 --- a/internal/router/router.go +++ b/internal/router/router.go @@ -634,8 +634,13 @@ func NewWithHooks(cfg *config.Config, db *sql.DB, rdb *redis.Client, geoDbs *mid // boundary for the anonymous case; strict mode ensures a caller who // *thinks* they're authenticated but presents a stale session // doesn't sign for an unowned tenant prefix. + // + // 2026-05-30: the H46 F1 fix landed in the comment but not in the + // chain (the route was still using bare OptionalAuth) — caught by + // the registry-iterating regression test in + // optional_auth_strict_coverage_test.go. Now matches the comment. app.Post("/storage/:token/presign", - middleware.OptionalAuth(cfg), + middleware.OptionalAuthStrict(cfg), middleware.PresignTokenRateLimit(rdb), middleware.Idempotency(rdb, "storage.presign"), storageH.PresignStorage, @@ -703,7 +708,7 @@ func NewWithHooks(cfg *config.Config, db *sql.DB, rdb *redis.Client, geoDbs *mid deployGroup.Post("/:id/redeploy", deployH.Redeploy) // Stacks — Phase 6 multi-service. - // New/Get/Logs/Delete use OptionalAuth (anonymous stacks supported, same as /db/new etc.). + // New/Get/Logs/Delete are anonymous-capable (same model as /db/new etc.). // UpdateEnv/Redeploy require auth (mutations on owned stacks). // RequireWritable rejects impersonated sessions on all mutating // stack endpoints (POST/PATCH/DELETE) so an admin viewing the @@ -711,10 +716,24 @@ func NewWithHooks(cfg *config.Config, db *sql.DB, rdb *redis.Client, geoDbs *mid // Idempotency middleware on /stacks/new + /stacks/:slug/redeploy // covers accidental double-clicks / agent retries the same way it // does for /deploy/new (multipart-aware fingerprint) and /db/new etc. - app.Post("/stacks/new", middleware.OptionalAuth(cfg), middleware.RequireWritable(), middleware.Idempotency(rdb, "stacks.new"), stackH.New) + // + // MUTATING routes (POST/DELETE) use OptionalAuthStrict for the same + // reason as /db/new etc. (T19 P1-7, 2026-05-20): a present-but-bad + // bearer header returns 401 instead of silently downgrading the + // caller to anonymous-tier provisioning. /stacks/new + DELETE + // /stacks/:slug were missed in the original strict-mode wave — this + // closes the surface (rule 17 / MR-P1-38 follow-up). A missing + // Authorization header still passes through as anonymous, since the + // routes are explicitly anonymous-capable. + // + // READ routes (GET) intentionally stay non-strict: a logged-out tab + // reading /stacks/:slug (e.g. follow-up after revocation) should not + // 401 the page — it should serve the anonymous read view if the slug + // belongs to an anonymous stack, or 404 otherwise. + app.Post("/stacks/new", middleware.OptionalAuthStrict(cfg), middleware.RequireWritable(), middleware.Idempotency(rdb, "stacks.new"), stackH.New) app.Get("/stacks/:slug", middleware.OptionalAuth(cfg), stackH.Get) app.Get("/stacks/:slug/logs/:svc", middleware.OptionalAuth(cfg), stackH.Logs) - app.Delete("/stacks/:slug", middleware.OptionalAuth(cfg), middleware.RequireWritable(), stackH.Delete) + app.Delete("/stacks/:slug", middleware.OptionalAuthStrict(cfg), middleware.RequireWritable(), stackH.Delete) app.Patch("/stacks/:slug/env", middleware.RequireAuth(cfg), middleware.RequireWritable(), stackH.UpdateEnv) app.Post("/stacks/:slug/redeploy", middleware.RequireAuth(cfg), middleware.RequireWritable(), middleware.Idempotency(rdb, "stacks.redeploy"), stackH.Redeploy) diff --git a/internal/testhelpers/testhelpers.go b/internal/testhelpers/testhelpers.go index 60f7c8ca..907c5157 100644 --- a/internal/testhelpers/testhelpers.go +++ b/internal/testhelpers/testhelpers.go @@ -1066,9 +1066,13 @@ func NewTestAppWithServices(t *testing.T, db *sql.DB, rdb *redis.Client, service // B17-P0 (BugBash 2026-05-20): broker-mode presign with the production // middleware chain so handler-level tests see the same guarantees as // production callers. See internal/router/router.go for the wiring - // rationale (OptionalAuth → PresignTokenRateLimit → Idempotency). + // rationale (OptionalAuthStrict → PresignTokenRateLimit → Idempotency). + // 2026-05-30: switched to OptionalAuthStrict to mirror production after + // the H46 F1 fix landed in router.go (the comment had said strict but + // the chain was bare). Handler-level tests need the strict variant or + // they would falsely pass while prod rejects a bad bearer. app.Post("/storage/:token/presign", - middleware.OptionalAuth(cfg), + middleware.OptionalAuthStrict(cfg), middleware.PresignTokenRateLimit(rdb), middleware.Idempotency(rdb, "storage.presign"), storageH.PresignStorage,