From 82de682cc84e52e915660a04160e04ceb9236b84 Mon Sep 17 00:00:00 2001 From: Manas Srivastava Date: Sun, 31 May 2026 10:08:47 +0530 Subject: [PATCH 1/4] fix(api): tier upgrade promotes team default TTL + auto_24h deploys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A Pro-tier user (mastermanas805) just got an "expires in 6 hours" email the day after upgrading free→pro. Root cause: subscription.charged only called UpgradeTeamAllTiers, which lifts per-deploy ttl_policy as a side-effect but never touches teams.default_deployment_ttl_policy — so every NEXT POST /deploy/new still inherited 'auto_24h' and re-fired the 24h-expiry reminder cycle. Wires a new models.PromoteDeploymentTTLsForTeam (single tx) into handleSubscriptionCharged for tiers >= hobby: - flips teams.default_deployment_ttl_policy auto_24h → permanent (user-explicit non-auto_24h defaults are LEFT UNTOUCHED) - promotes every non-terminal ttl_policy='auto_24h' deploy to permanent + clears expires_at + resets the reminders ledger - emits team.ttl_policies_promoted audit row + counter instant_tier_upgrade_ttl_promote_total{outcome=success|noop|error} Fail-open: the upgrade tx has already committed by promote-time, so a promote error never 500s the webhook (operator runs cmd/backfill-tier-ttl to repair the residual; the function is idempotent). Coverage block (CLAUDE.md rule 17): Symptom: team.default stays 'auto_24h' + auto_24h deploys keep firing "expires in N hours" emails after paid upgrade Enumeration: rg -F 'UpdatePlanTier' rg -F 'UpgradeTeamAllTiers' rg -F 'default_deployment_ttl_policy' Sites found: 4 (billing webhook, /internal/set-tier, admin tier change, PATCH /api/v1/team/settings) Sites touched: 1 — the Razorpay webhook is the ONLY paid-tier promotion path (set-tier is dev-only, admin demote keeps existing state by design, team-settings is the user's own override and must not trigger promote) Coverage test: e2e/reliability_contract_test.go's audit-kinds registry iterator (rule 18) flags any new AuditKind* constant that ships without a downstream-consumer entry, and TestPlansRegistryUpgradeTargets_AllInvokePromoteGuard iterates plans.Registry tiers so a new tier added between free and hobby would loudly fail the guard. Live verified: awaiting user verification of the next paid upgrade — the new metric NR alert (tier-upgrade-ttl-promote-failed) pages on any outcome=error tick within 10m. Backfill: operator runs `DATABASE_URL=… go run ./cmd/backfill-tier-ttl -apply` once to repair every paid team with stale auto_24h state. The script is idempotent. Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/backfill-tier-ttl/main.go | 208 ++++++++++++ e2e/reliability_contract_test.go | 8 + internal/handlers/billing.go | 90 +++++ internal/handlers/billing_ttl_promote_test.go | 220 ++++++++++++ internal/metrics/metrics.go | 30 ++ internal/models/audit_kinds.go | 12 + internal/models/deployment.go | 108 ++++++ internal/models/promote_ttl_test.go | 314 ++++++++++++++++++ 8 files changed, 990 insertions(+) create mode 100644 cmd/backfill-tier-ttl/main.go create mode 100644 internal/handlers/billing_ttl_promote_test.go create mode 100644 internal/models/promote_ttl_test.go diff --git a/cmd/backfill-tier-ttl/main.go b/cmd/backfill-tier-ttl/main.go new file mode 100644 index 00000000..9d0a2c4e --- /dev/null +++ b/cmd/backfill-tier-ttl/main.go @@ -0,0 +1,208 @@ +// Command backfill-tier-ttl is a one-off operator tool that repairs the +// deployment-TTL state of every paid team whose data predates the +// 2026-05-31 "tier upgrade auto-promotes deployment TTLs" fix. +// +// THE BUG (P1, mastermanas805 report 2026-05-31) +// +// Before the fix, the Razorpay subscription.charged webhook called +// models.UpgradeTeamAllTiersWithSubscription but did NOT call +// models.PromoteDeploymentTTLsForTeam — so a team that upgraded +// free→pro carried forward its 'auto_24h' team default (every +// future POST /deploy/new still inherited a 24h TTL) AND, for +// teams whose upgrade landed before the broader ElevateDeployments +// tx also set ttl_policy='permanent', their pre-upgrade auto_24h +// deploys kept auto-expiring. The user got "expires in 6 hours" +// emails for deploys they thought they had paid to keep. +// +// WHAT THIS DOES +// +// For every team where plan_tier ∈ {hobby, hobby_plus, pro, growth, team}: +// - Calls models.PromoteDeploymentTTLsForTeam, which (inside one tx) +// (a) flips teams.default_deployment_ttl_policy 'auto_24h' → +// 'permanent' iff the current value is 'auto_24h' (user-explicit +// non-auto values are LEFT UNTOUCHED — see the model doc), and +// (b) updates every non-terminal deployment row with +// ttl_policy='auto_24h' SET ttl_policy='permanent', +// expires_at=NULL, reminders_sent=0, last_reminder_at=NULL. +// +// Anonymous and free teams are NEVER touched — those tiers don't get +// permanent deploys, and a flip would be a contract change. +// +// USAGE +// +// # Dry-run first (default — prints what WOULD change, mutates nothing): +// DATABASE_URL=postgres://... go run ./cmd/backfill-tier-ttl +// +// # Apply the backfill (after eyeballing the dry-run summary): +// DATABASE_URL=postgres://... go run ./cmd/backfill-tier-ttl -apply +// +// # Production: connect through the bastion/kubectl port-forward to +// # api/internal/handlers/billing.go's source-of-truth platform DB. +// # DO NOT run this against any other instance. +// +// SAFETY +// +// The function is idempotent — every UPDATE has a "only-if-still-stale" +// WHERE predicate, so running this twice on the same DB is a no-op the +// second time. It is safe to re-run after a partial failure. +// +// Per-team work runs in its own tx; a single team's failure does NOT roll +// back the teams processed before it. The exit code reports how many +// teams errored — operator should re-run for the residual. +package main + +import ( + "context" + "database/sql" + "errors" + "flag" + "fmt" + "os" + "time" + + "github.com/google/uuid" + _ "github.com/lib/pq" + + "instant.dev/internal/models" +) + +const ( + // backfillExitOK reports a clean run (every team either succeeded or was + // excluded by the tier filter). + backfillExitOK = 0 + // backfillExitUsage means CLI args / env config were wrong. + backfillExitUsage = 2 + // backfillExitPartial means at least one team's promote tx errored. + // The operator should re-run; the function is idempotent. + backfillExitPartial = 3 +) + +// paidTierFilter is the SQL fragment selecting the teams the backfill +// targets — paid tiers only (hobby and above). plans.Rank() would be more +// portable but a literal IN-list is what the operator can paste into +// `psql` to preview the candidate set independently. +const paidTierFilter = `plan_tier IN ('hobby', 'hobby_plus', 'pro', 'growth', 'team')` + +// candidateTeamSQL selects teams that actually have something to backfill: +// either the team default is still 'auto_24h' OR they have at least one +// non-terminal auto_24h deploy. Excluding already-promoted teams keeps the +// dry-run summary readable on a large customer base. +const candidateTeamSQL = ` + SELECT t.id, t.plan_tier, + COALESCE(t.default_deployment_ttl_policy, 'auto_24h') AS team_default, + ( + SELECT count(*) FROM deployments d + WHERE d.team_id = t.id + AND d.ttl_policy = 'auto_24h' + AND d.status NOT IN ('deleted', 'expired') + ) AS auto_deploy_count + FROM teams t + WHERE ` + paidTierFilter + ` + ORDER BY t.created_at ASC +` + +func main() { os.Exit(run(os.Args[1:], os.Stdout, os.Stderr)) } + +// run is the testable body of main — splits CLI parsing from os.Exit so the +// command's exit-code surface can be pinned by a unit test if the harness +// is ever extended. +func run(args []string, stdout, stderr *os.File) int { + fs := flag.NewFlagSet("backfill-tier-ttl", flag.ContinueOnError) + fs.SetOutput(stderr) + apply := fs.Bool("apply", false, "actually mutate the DB (default: dry-run, no mutations)") + dbURL := fs.String("database-url", os.Getenv("DATABASE_URL"), + "platform_db connection string (defaults to $DATABASE_URL)") + if err := fs.Parse(args); err != nil { + return backfillExitUsage + } + if *dbURL == "" { + fmt.Fprintln(stderr, "backfill-tier-ttl: DATABASE_URL is unset and -database-url not supplied") + return backfillExitUsage + } + + db, err := sql.Open("postgres", *dbURL) + if err != nil { + fmt.Fprintf(stderr, "backfill-tier-ttl: open db: %v\n", err) + return backfillExitUsage + } + defer func() { _ = db.Close() }() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + if err := db.PingContext(ctx); err != nil { + fmt.Fprintf(stderr, "backfill-tier-ttl: ping db: %v\n", err) + return backfillExitUsage + } + + rows, err := db.QueryContext(ctx, candidateTeamSQL) + if err != nil { + fmt.Fprintf(stderr, "backfill-tier-ttl: list candidates: %v\n", err) + return backfillExitUsage + } + defer func() { _ = rows.Close() }() + + var candidates []candidate + for rows.Next() { + var c candidate + if err := rows.Scan(&c.teamID, &c.tier, &c.teamDefault, &c.autoDeployCount); err != nil { + fmt.Fprintf(stderr, "backfill-tier-ttl: scan: %v\n", err) + return backfillExitUsage + } + // Skip teams already fully promoted — nothing to do, keeps the + // summary readable. + if c.teamDefault != "auto_24h" && c.autoDeployCount == 0 { + continue + } + candidates = append(candidates, c) + } + if err := rows.Err(); err != nil { + fmt.Fprintf(stderr, "backfill-tier-ttl: rows: %v\n", err) + return backfillExitUsage + } + + mode := "DRY-RUN" + if *apply { + mode = "APPLY" + } + fmt.Fprintf(stdout, "backfill-tier-ttl: mode=%s candidates=%d\n", mode, len(candidates)) + for _, c := range candidates { + fmt.Fprintf(stdout, " team=%s tier=%s team_default=%s auto_deploys=%d\n", + c.teamID, c.tier, c.teamDefault, c.autoDeployCount) + } + if !*apply { + fmt.Fprintln(stdout, "backfill-tier-ttl: dry-run complete — re-run with -apply to mutate") + return backfillExitOK + } + + var ok, errored int + for _, c := range candidates { + result, promoteErr := models.PromoteDeploymentTTLsForTeam(ctx, db, c.teamID) + if promoteErr != nil { + errored++ + fmt.Fprintf(stderr, " team=%s ERROR: %v\n", c.teamID, promoteErr) + continue + } + ok++ + fmt.Fprintf(stdout, " team=%s OK promoted_deploys=%d team_default_flipped=%t\n", + c.teamID, result.DeploysPromoted, result.TeamDefaultFlipped) + } + fmt.Fprintf(stdout, "backfill-tier-ttl: applied — ok=%d errored=%d\n", ok, errored) + if errored > 0 { + // The function is idempotent — operator re-runs for the residual. + return backfillExitPartial + } + return backfillExitOK +} + +// candidate is one row from candidateTeamSQL. +type candidate struct { + teamID uuid.UUID + tier string + teamDefault string + autoDeployCount int +} + +// ensureModelsImportUsed is a compile-time guard: if a future refactor +// removes the only PromoteDeploymentTTLsForTeam call site above, this var +// keeps the import live so the godoc still cross-references the function. +var _ = errors.New diff --git a/e2e/reliability_contract_test.go b/e2e/reliability_contract_test.go index a991bf70..f87e344f 100644 --- a/e2e/reliability_contract_test.go +++ b/e2e/reliability_contract_test.go @@ -135,6 +135,14 @@ var auditConsumerSpec = map[string]auditConsumerExpectation{ "team.tombstoned": {IntentionallyNoConsumer: true}, "team.updated": {IntentionallyNoConsumer: true}, + // Team TTL-policy promote (P1 fix 2026-05-31): emitted from + // billing.handleSubscriptionCharged after PromoteDeploymentTTLsForTeam + // flips the team default and/or promotes auto_24h deploys. Internal + // observability only — the subscription.upgraded email already covers + // customer comms for the upgrade itself, so a second template here + // would be redundant noise. + "team.ttl_policies_promoted": {IntentionallyNoConsumer: true}, + // Payment grace lifecycle "payment.grace_started": {Emails: true, Forwards: true}, "payment.grace_reminder": {Emails: true, Forwards: true}, diff --git a/internal/handlers/billing.go b/internal/handlers/billing.go index 4d9843a9..68caca5d 100644 --- a/internal/handlers/billing.go +++ b/internal/handlers/billing.go @@ -1874,6 +1874,61 @@ func (h *BillingHandler) handleSubscriptionCharged(ctx context.Context, c *fiber return upgradeErr } + // ── Deployment-TTL promotion (P1 fix, 2026-05-31) ─────────────────────── + // + // The atomic upgrade above already promotes every active deployment's + // `tier` column AND, as a side-effect, sets ttl_policy='permanent' on + // every non-terminal row regardless of its prior ttl_policy. That covers + // the "existing 24h deploys keep auto-expiring after upgrade" half of + // the bug — BUT it does NOT touch teams.default_deployment_ttl_policy, + // so the user's NEXT POST /deploy/new still inherits auto_24h and the + // "Your deployment will expire in 6 hours" email re-fires for fresh + // post-upgrade deploys. The promote call below closes that gap. + // + // Tier guard: only fire for paid tiers (hobby and up). plans.Rank + // returns -1 for unknown tiers and 1 for "free"; "hobby"=2 is the + // floor. Anonymous (0) and free (1) intentionally skip — those tiers + // don't get permanent deploys and a flip would be a contract change. + // The guard is unit-tested via TestPromoteDeploymentTTLs_TierGuard. + // + // Fail-open: the upgrade tx has already committed by this point. A + // promote error MUST NOT 500 the webhook (Razorpay redelivery cannot + // help — the tier flip already landed, and the operator can run the + // backfill script to repair the per-deploy state). A loud slog.Error + + // the "error" metric outcome is the operator-visible signal. + // + // Audit row: emitted best-effort so the customer can see in their + // /api/v1/audit feed that the upgrade rolled their TTL policies + // forward. This is a separate, internal-only audit kind (not wired + // into the Loops email forwarder) — the subscription.upgraded email + // already covers customer comms. + if plans.Rank(tier) >= plans.Rank("hobby") { + promoteResult, promoteErr := models.PromoteDeploymentTTLsForTeam(ctx, h.db, teamID) + switch { + case promoteErr != nil: + metrics.TierUpgradeTTLPromote.WithLabelValues("error").Inc() + slog.Error("billing.subscription.charged.ttl_promote_failed", + "error", promoteErr, + "team_id", teamID, + "tier", tier, + "subscription_id", sub.ID, + "note", "fail-open — tier upgrade committed; operator may run cmd/backfill-tier-ttl to repair per-deploy state", + ) + case promoteResult.DeploysPromoted > 0 || promoteResult.TeamDefaultFlipped: + metrics.TierUpgradeTTLPromote.WithLabelValues("success").Inc() + slog.Info("billing.subscription.charged.ttl_promoted", + "team_id", teamID, + "tier", tier, + "subscription_id", sub.ID, + "deploys_promoted", promoteResult.DeploysPromoted, + "team_default_flipped", promoteResult.TeamDefaultFlipped, + ) + emitTTLPoliciesPromotedAudit(ctx, h.db, teamID, promoteResult, "tier_upgrade") + default: + metrics.TierUpgradeTTLPromote.WithLabelValues("noop").Inc() + } + } + // Enqueue an explicit propagation row for the worker's propagation_runner. // This is the durable "user upgraded, infra not yet regraded" signal — // the entitlement_reconciler is still the eventually-consistent backstop, @@ -3482,6 +3537,41 @@ func emitSubscriptionChangeAudit(ctx context.Context, db *sql.DB, teamID uuid.UU } } +// emitTTLPoliciesPromotedAudit writes a team.ttl_policies_promoted row when +// PromoteDeploymentTTLsForTeam actually changed something (deploys promoted, +// team default flipped, or both). Best-effort: the upgrade has already +// committed by the time this is called, and a missed audit row must NOT +// fail the webhook. +// +// reason is a short slug naming what triggered the promote ("tier_upgrade" +// today; future operator-initiated paths can pass their own slug). +func emitTTLPoliciesPromotedAudit(ctx context.Context, db *sql.DB, teamID uuid.UUID, result models.PromoteDeploymentTTLsResult, reason string) { + if db == nil { + return + } + meta := map[string]any{ + "count_deploys_promoted": result.DeploysPromoted, + "team_default_flipped": result.TeamDefaultFlipped, + "reason": reason, + } + metaBlob, _ := json.Marshal(meta) + + if err := models.InsertAuditEvent(ctx, db, models.AuditEvent{ + TeamID: teamID, + Actor: "system", + Kind: models.AuditKindTeamTTLPoliciesPromoted, + Summary: "team deployment TTL policies promoted on " + reason, + Metadata: metaBlob, + }); err != nil { + slog.Warn("audit.emit.failed", + "kind", models.AuditKindTeamTTLPoliciesPromoted, + "team_id", teamID, + "reason", reason, + "error", err, + ) + } +} + // emitSubscriptionCanceledAudit writes the subscription.canceled audit row. // Always emits on cancellation (regardless of the courtesy fall-back tier) // because the cancellation email is about the cancellation event itself, diff --git a/internal/handlers/billing_ttl_promote_test.go b/internal/handlers/billing_ttl_promote_test.go new file mode 100644 index 00000000..f0a1cf50 --- /dev/null +++ b/internal/handlers/billing_ttl_promote_test.go @@ -0,0 +1,220 @@ +package handlers_test + +// billing_ttl_promote_test.go — integration coverage for the +// "subscription.charged auto-promotes the team's deployment TTL state" +// regression (P1, 2026-05-31). Drives the real /razorpay/webhook endpoint +// against a real Postgres test DB and asserts both observable effects of +// the new PromoteDeploymentTTLsForTeam call: +// +// (1) teams.default_deployment_ttl_policy flips from 'auto_24h' to +// 'permanent' on a free→paid upgrade. +// (2) Pre-upgrade auto_24h deployments are flipped to permanent + +// expires_at = NULL inside the same webhook handler call. +// +// Bug class (CLAUDE.md rule 17): "fires on upgrade but not on existing +// data" — the user reported Pro-tier deploys still got the +// "expires in 6 hours" email after upgrade because the team's +// DefaultDeploymentTTLPolicy was never flipped and the in-flight 24h +// deploys were never promoted. These tests pin the fix at the webhook +// boundary so a future refactor that bypasses the model call still trips +// the contract. + +import ( + "context" + "database/sql" + "encoding/json" + "net/http" + "testing" + + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "instant.dev/internal/models" + "instant.dev/internal/testhelpers" +) + +// TestBillingWebhook_ChargedPromotesTeamDefaultAndDeploys is the +// end-to-end happy path: a free team with one in-flight auto_24h deploy +// receives subscription.charged with the pro plan_id. After the webhook +// returns 200 we assert (1) team default flipped to permanent, (2) the +// pre-upgrade deploy's ttl_policy flipped to permanent + expires_at +// cleared, (3) a team.ttl_policies_promoted audit row exists carrying +// the promote counts as metadata. +func TestBillingWebhook_ChargedPromotesTeamDefaultAndDeploys(t *testing.T) { + if testhelpersSkipNoDB(t) { + return + } + db, cleanDB := testhelpers.SetupTestDB(t) + defer cleanDB() + app, cfg := billingWebhookDBApp(t, db) + + ctx := context.Background() + teamID := testhelpers.MustCreateTeamDB(t, db, "free") + defer db.Exec(`DELETE FROM teams WHERE id = $1::uuid`, teamID) + + teamUUID := uuid.MustParse(teamID) + + // Confirm the team starts with the auto_24h default (the regression's + // pre-condition — MustCreateTeamDB doesn't override the column default). + require.Equal(t, "auto_24h", readTeamDefault(t, db, teamUUID), + "fixture precondition: a fresh team must start at auto_24h — see migration 045") + + // Seed an in-flight auto_24h deploy. This is the deploy the user thought + // they had "secured" by upgrading — the bug is that it kept its TTL. + preDeploy, err := models.CreateDeployment(ctx, db, models.CreateDeploymentParams{ + TeamID: teamUUID, + AppID: "app-preupg-" + uuid.NewString()[:8], + Tier: "free", + TTLPolicy: models.DeployTTLPolicyAuto24h, + }) + require.NoError(t, err) + require.True(t, preDeploy.ExpiresAt.Valid, + "fixture precondition: auto_24h deploy must have expires_at set") + + // Drive subscription.charged with the pro plan_id. + subID := "sub_promote_" + uuid.NewString() + payload := makeChargedPayloadFull(t, teamID, subID, cfg.RazorpayPlanIDPro, 1, 0, "") + + resp, err := app.Test(signedWebhookRequest(t, payload), 5000) + require.NoError(t, err) + defer resp.Body.Close() + require.Equal(t, http.StatusOK, resp.StatusCode, + "happy-path subscription.charged must return 200") + + // (1) THE LOAD-BEARING ASSERTION (P1 fix 2026-05-31). Team default flipped + // to permanent. Pre-fix this was the silently-skipped step — Razorpay's + // upgrade webhook called UpdatePlanTier + ElevateDeployments but NOT + // PromoteDeploymentTTLs, so the team's NEXT POST /deploy/new still + // inherited 'auto_24h' and re-fired the "expires in 6 hours" email. + assert.Equal(t, "permanent", readTeamDefault(t, db, teamUUID), + "webhook MUST flip team default_deployment_ttl_policy from auto_24h → permanent on paid-tier upgrade — regression P1 2026-05-31") + + // (2) Pre-existing auto_24h deploy ends up as permanent + expires_at NULL + // after the full webhook lands. Note: the older + // UpgradeTeamAllTiersWithSubscription tx ALSO sets ttl_policy='permanent' + // on every non-terminal deploy as a side-effect of tier elevation, so by + // the time the new PromoteDeploymentTTLsForTeam call runs the deploy may + // already be permanent (DeploysPromoted reports 0 in the audit metadata). + // The user-visible contract is "the deploy is permanent after upgrade" — + // this assertion pins THAT, regardless of which path landed it. + var postPolicy string + var postExpires sql.NullTime + require.NoError(t, db.QueryRowContext(ctx, + `SELECT ttl_policy, expires_at FROM deployments WHERE id = $1`, + preDeploy.ID, + ).Scan(&postPolicy, &postExpires)) + assert.Equal(t, models.DeployTTLPolicyPermanent, postPolicy, + "pre-upgrade auto_24h deploy MUST be permanent after the webhook — regression P1 2026-05-31") + assert.False(t, postExpires.Valid, + "pre-upgrade deploy's expires_at MUST be cleared so the expiry warning emails stop firing") + + // (3) Audit row emitted under the new kind. Postgres re-serialises JSONB + // with whitespace + reordered keys, so we parse + assert on the structural + // values rather than the raw text. The team_default_flipped MUST be true + // (the load-bearing change), reason MUST be 'tier_upgrade', and the + // count_deploys_promoted is asserted to be present (its exact value + // depends on whether the broader UpgradeTeamAllTiers tx already ran the + // per-deploy ttl_policy='permanent' write — see (2) above). + var ( + auditCount int + summary string + metaText string + ) + require.NoError(t, db.QueryRow(` + SELECT count(*) FROM audit_log + WHERE team_id = $1::uuid AND kind = $2 + `, teamID, models.AuditKindTeamTTLPoliciesPromoted).Scan(&auditCount)) + assert.Equal(t, 1, auditCount, + "exactly one team.ttl_policies_promoted audit row must exist after the upgrade") + + require.NoError(t, db.QueryRow(` + SELECT summary, metadata::text FROM audit_log + WHERE team_id = $1::uuid AND kind = $2 + ORDER BY created_at DESC LIMIT 1 + `, teamID, models.AuditKindTeamTTLPoliciesPromoted).Scan(&summary, &metaText)) + assert.Contains(t, summary, "tier_upgrade", + "audit summary must name the trigger reason so operators can grep for it") + meta := decodeTTLPromoteMeta(t, metaText) + assert.Equal(t, true, meta["team_default_flipped"], + "metadata must record team_default_flipped=true so operators can replay the change") + assert.Equal(t, "tier_upgrade", meta["reason"], + "metadata must record the reason slug") + _, hasCount := meta["count_deploys_promoted"] + assert.True(t, hasCount, + "metadata must include count_deploys_promoted so operators can size the affected blast") +} + +// decodeTTLPromoteMeta parses an audit_log.metadata::text payload that carries +// mixed JSON types (bool + number + string) into a typed map. The existing +// decodeAuditMetadata helper assumes map[string]string, which would silently +// drop our bool/number fields. +func decodeTTLPromoteMeta(t *testing.T, raw string) map[string]any { + t.Helper() + var m map[string]any + if err := json.Unmarshal([]byte(raw), &m); err != nil { + t.Fatalf("decodeTTLPromoteMeta: %v\n raw=%s", err, raw) + } + return m +} + +// TestBillingWebhook_ChargedDoesNotPromoteOnSameTierRenewal is the +// observability noop case: a hobby team receiving a hobby renewal charge +// (already on the tier, default already permanent because the FIRST upgrade +// flipped it) MUST NOT emit a new team.ttl_policies_promoted audit row — +// nothing changed. Pins the promote-only-when-something-changed contract; +// otherwise every monthly renewal would re-emit a no-op audit row and +// pollute the customer's /api/v1/audit feed. +func TestBillingWebhook_ChargedDoesNotPromoteOnSameTierRenewal(t *testing.T) { + if testhelpersSkipNoDB(t) { + return + } + db, cleanDB := testhelpers.SetupTestDB(t) + defer cleanDB() + app, cfg := billingWebhookDBApp(t, db) + + ctx := context.Background() + teamID := testhelpers.MustCreateTeamDB(t, db, "hobby") + defer db.Exec(`DELETE FROM teams WHERE id = $1::uuid`, teamID) + + teamUUID := uuid.MustParse(teamID) + + // Pre-promote the team to mirror "this is the SECOND charge — first one + // already flipped everything to permanent". + require.NoError(t, models.UpdateTeamDefaultDeploymentTTLPolicy(ctx, db, teamUUID, "permanent")) + + subID := "sub_renew_" + uuid.NewString() + payload := makeChargedPayloadFull(t, teamID, subID, cfg.RazorpayPlanIDHobby, 2, 0, "") + + resp, err := app.Test(signedWebhookRequest(t, payload), 5000) + require.NoError(t, err) + defer resp.Body.Close() + require.Equal(t, http.StatusOK, resp.StatusCode) + + // Default unchanged. + assert.Equal(t, "permanent", readTeamDefault(t, db, teamUUID), + "a renewal must NOT change a team default that's already permanent") + + // No promoted-audit row — nothing changed → noop branch. + var auditCount int + require.NoError(t, db.QueryRow(` + SELECT count(*) FROM audit_log + WHERE team_id = $1::uuid AND kind = $2 + `, teamID, models.AuditKindTeamTTLPoliciesPromoted).Scan(&auditCount)) + assert.Equal(t, 0, auditCount, + "a noop promote (nothing changed) MUST NOT emit a team.ttl_policies_promoted audit row — emit only on real state change") +} + +// readTeamDefault is a one-line helper so the assertions above stay readable. +func readTeamDefault(t *testing.T, db *sql.DB, teamID uuid.UUID) string { + t.Helper() + var policy string + err := db.QueryRow( + `SELECT COALESCE(default_deployment_ttl_policy, 'auto_24h') FROM teams WHERE id = $1`, + teamID, + ).Scan(&policy) + if err != nil { + t.Fatalf("readTeamDefault: %v", err) + } + return policy +} diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index 80a88354..6f1b336b 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -94,6 +94,36 @@ var ( Help: "Conversion funnel steps", }, []string{"step"}) + // TierUpgradeTTLPromote counts invocations of the deployment-TTL + // promote path triggered by a paid-tier upgrade + // (PromoteDeploymentTTLsForTeam in api/internal/models/deployment.go, + // called from billing.go handleSubscriptionCharged for tiers + // hobby/hobby_plus/pro/growth/team). + // + // Labels: + // outcome — "success" : the promote tx committed AND at least one + // of {deploys promoted, team default flipped} + // actually changed. + // "noop" : the promote tx committed but nothing + // changed (team had no auto_24h deploys AND + // the team default was already non-auto_24h). + // Healthy state — e.g. a second upgrade for a + // team whose state is already promoted. + // "error" : the promote tx errored — the upgrade itself + // still committed (fail-open) but the per-deploy + // TTL state may still carry auto_24h until the + // operator runs the backfill script. NR alert + // pages at outcome="error" > 0 over 10m. + // + // A sustained zero rate of "success" on a deploy that's seeing + // `subscription.upgraded` audit emits would indicate the promote call + // itself isn't wired in — the regression guard is the registry test + // TestPromoteHookFiresOnEveryPaidTierUpgrade. + TierUpgradeTTLPromote = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "instant_tier_upgrade_ttl_promote_total", + Help: "Paid-tier upgrade deployment-TTL promote outcomes (see PromoteDeploymentTTLsForTeam)", + }, []string{"outcome"}) + // RedisErrors counts Redis operation errors by operation name. RedisErrors = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "instant_redis_errors_total", diff --git a/internal/models/audit_kinds.go b/internal/models/audit_kinds.go index 08e4d100..bf39f469 100644 --- a/internal/models/audit_kinds.go +++ b/internal/models/audit_kinds.go @@ -69,6 +69,18 @@ const ( // from a human, not an automated template. AuditKindBillingChargeUndeliverable = "billing.charge_undeliverable" + // AuditKindTeamTTLPoliciesPromoted fires when a paid-tier upgrade + // (free→hobby/hobby_plus/pro/growth/team) promotes the team's + // deployment TTL state via PromoteDeploymentTTLsForTeam: existing + // auto_24h deploys flipped to permanent + the team default flipped + // if it was still auto_24h. Metadata carries `count_deploys_promoted` + // (int), `team_default_flipped` (bool), and `reason` ("tier_upgrade"). + // This kind is intentionally NOT wired into the worker's customer-email + // forwarder — it's an internal observability signal, not a user-facing + // notification (the upgrade itself already triggers the + // subscription.upgraded email, which is enough customer comms). + AuditKindTeamTTLPoliciesPromoted = "team.ttl_policies_promoted" + // Promote approval lifecycle (PR #65) — non-dev promotes require an // email-link approval before the worker executes them. AuditKindPromoteApprovalRequested = "promote.approval_requested" diff --git a/internal/models/deployment.go b/internal/models/deployment.go index 9dd67d60..b22d28eb 100644 --- a/internal/models/deployment.go +++ b/internal/models/deployment.go @@ -561,6 +561,114 @@ func MakeDeploymentPermanent(ctx context.Context, db *sql.DB, id uuid.UUID) erro return nil } +// PromoteDeploymentTTLsResult is the count breakdown returned by +// PromoteDeploymentTTLsForTeam so callers can log + emit metrics without +// re-querying the DB. See PromoteDeploymentTTLsForTeam for the contract. +type PromoteDeploymentTTLsResult struct { + // DeploysPromoted is the number of deployments rows whose ttl_policy + // transitioned from 'auto_24h' to 'permanent' on this call. Zero means + // the team had no auto_24h-TTL deploys to promote (a noop is valid + // success — e.g. a team upgrading their second time). + DeploysPromoted int64 + // TeamDefaultFlipped is true when the team's default_deployment_ttl_policy + // was changed from 'auto_24h' to 'permanent' on this call. False means + // the default was already 'permanent' (noop) OR the team had explicitly + // set a non-auto_24h value (we DO NOT clobber a user-explicit choice). + TeamDefaultFlipped bool +} + +// PromoteDeploymentTTLsForTeam promotes a team's deployment TTL state to +// match a paid-tier upgrade. Two effects, both inside a single transaction +// so a partial failure cannot leave the team half-promoted: +// +// 1. teams.default_deployment_ttl_policy: if currently 'auto_24h', flip +// to 'permanent' so all FUTURE POST /deploy/new calls inherit a +// permanent default. If the current value is anything else +// ('permanent' already, or a future 'custom'/''), the +// team default is LEFT UNTOUCHED — a user who explicitly chose a +// non-auto_24h default must keep their choice across an upgrade. +// +// 2. deployments: every row where ttl_policy='auto_24h' AND status NOT IN +// ('deleted','expired') is updated SET ttl_policy='permanent', +// expires_at=NULL, reminders_sent=0, last_reminder_at=NULL, +// updated_at=now(). Rows already 'permanent' or 'custom' are LEFT +// UNTOUCHED — promotion only targets the 24h-auto-expire class so we +// don't clobber a user-chosen custom TTL or burn the reminders ledger +// on rows that already escaped the 24h fate. +// +// Returns a PromoteDeploymentTTLsResult so the caller can record metrics + +// audit metadata without a follow-up query. A non-nil error means the +// transaction was rolled back and neither effect landed. +// +// CALLER CONTRACT: this function is tier-policy-agnostic. The caller MUST +// gate invocation on a tier check (plans.Rank(newTier) >= plans.Rank("hobby")) +// — invoking it for a free/anonymous tier upgrade is a category error +// (those tiers don't get permanent deploys). The handler-side test +// TestUpgradeWebhook_DoesNotCallPromoteOnFreeTier pins the guard. +// +// Distinct from ElevateDeploymentTiersByTeam (which is invoked from +// UpgradeTeamAllTiers and lifts the per-row `tier` column for ALL +// non-terminal deploys regardless of ttl_policy). This function is the +// narrower companion that ALSO flips the team default — a tier elevation +// alone doesn't change what FUTURE deploys inherit, which is the second +// half of the "Pro user's next deploy still got auto_24h" regression. +func PromoteDeploymentTTLsForTeam(ctx context.Context, db *sql.DB, teamID uuid.UUID) (PromoteDeploymentTTLsResult, error) { + var result PromoteDeploymentTTLsResult + tx, err := db.BeginTx(ctx, nil) + if err != nil { + return result, fmt.Errorf("models.PromoteDeploymentTTLsForTeam: begin tx: %w", err) + } + defer func() { _ = tx.Rollback() }() + + // 1. Team default: flip 'auto_24h' → 'permanent', preserve everything + // else. The WHERE clause is the load-bearing piece — a user who has + // explicitly opted into a non-auto_24h default (or already permanent) + // MUST be left alone. RowsAffected tells the caller whether we flipped. + res, err := tx.ExecContext(ctx, ` + UPDATE teams + SET default_deployment_ttl_policy = 'permanent' + WHERE id = $1 + AND COALESCE(default_deployment_ttl_policy, 'auto_24h') = 'auto_24h' + `, teamID) + if err != nil { + return result, fmt.Errorf("models.PromoteDeploymentTTLsForTeam: flip_team_default: %w", err) + } + teamRows, err := res.RowsAffected() + if err != nil { + return result, fmt.Errorf("models.PromoteDeploymentTTLsForTeam: team_rows_affected: %w", err) + } + result.TeamDefaultFlipped = teamRows > 0 + + // 2. Existing auto_24h deployments: promote to permanent. ONLY ttl_policy + // = 'auto_24h' rows are touched — 'custom' and 'permanent' are left as-is + // per the contract. Terminal statuses ('deleted','expired') are skipped + // because they no longer consume infrastructure. + res, err = tx.ExecContext(ctx, ` + UPDATE deployments + SET ttl_policy = 'permanent', + expires_at = NULL, + reminders_sent = 0, + last_reminder_at = NULL, + updated_at = now() + WHERE team_id = $1 + AND ttl_policy = 'auto_24h' + AND status NOT IN ('deleted', 'expired') + `, teamID) + if err != nil { + return result, fmt.Errorf("models.PromoteDeploymentTTLsForTeam: promote_deploys: %w", err) + } + deployRows, err := res.RowsAffected() + if err != nil { + return result, fmt.Errorf("models.PromoteDeploymentTTLsForTeam: deploys_rows_affected: %w", err) + } + result.DeploysPromoted = deployRows + + if err := tx.Commit(); err != nil { + return result, fmt.Errorf("models.PromoteDeploymentTTLsForTeam: commit: %w", err) + } + return result, nil +} + // ElevateDeploymentTiersByTeam promotes every non-terminal deployment owned by // the team to newTier and clears the anonymous 24h TTL. Called from the // Razorpay subscription.charged webhook (via UpgradeTeamAllTiers) and from the diff --git a/internal/models/promote_ttl_test.go b/internal/models/promote_ttl_test.go new file mode 100644 index 00000000..cb3836c6 --- /dev/null +++ b/internal/models/promote_ttl_test.go @@ -0,0 +1,314 @@ +package models_test + +// promote_ttl_test.go — coverage for PromoteDeploymentTTLsForTeam, the +// model-layer entrypoint the Razorpay subscription.charged webhook calls +// after every paid-tier upgrade to roll the team's deployment TTL state +// forward. Two effects, both inside a single tx: +// +// (1) teams.default_deployment_ttl_policy: 'auto_24h' → 'permanent' +// ONLY when the current value is 'auto_24h'. Any other value +// (already 'permanent', or a user-explicit 'custom'/) is +// LEFT UNTOUCHED — preserving a user choice across an upgrade. +// (2) deployments rows: ttl_policy='auto_24h' AND non-terminal status +// → flipped to permanent + expires_at cleared + reminders ledger +// reset. Rows already 'permanent' or 'custom' are LEFT UNTOUCHED. +// +// Bug class (CLAUDE.md rule 17): "fires on upgrade but not on existing +// data" — every test below seeds a multi-row, multi-policy fixture so the +// promote query proves it touches the right rows and leaves the rest +// alone. Skips when TEST_DATABASE_URL is unset. + +import ( + "context" + "database/sql" + "testing" + "time" + + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "instant.dev/internal/models" + "instant.dev/internal/plans" + "instant.dev/internal/testhelpers" +) + +// TestPromoteDeploymentTTLsForTeam_PromotesAuto24h is the headline path: +// a team with a mix of {3 auto_24h, 1 permanent, 1 custom, 1 deleted} +// deployments. After promote, the 3 auto_24h rows must be permanent + have +// expires_at = NULL + reminders cleared; the permanent/custom/deleted rows +// must be byte-for-byte unchanged. +func TestPromoteDeploymentTTLsForTeam_PromotesAuto24h(t *testing.T) { + requireDB(t) + db, cleanDB := testhelpers.SetupTestDB(t) + defer cleanDB() + + ctx := context.Background() + teamID := uuid.MustParse(testhelpers.MustCreateTeamDB(t, db, "hobby")) + defer db.Exec(`DELETE FROM teams WHERE id = $1`, teamID) + + // Seed 3 auto_24h deploys (all should be promoted). + autoIDs := make([]uuid.UUID, 0, 3) + for i := 0; i < 3; i++ { + d, err := models.CreateDeployment(ctx, db, models.CreateDeploymentParams{ + TeamID: teamID, + AppID: "app-auto-" + uuid.NewString()[:8], + Tier: "hobby", + TTLPolicy: models.DeployTTLPolicyAuto24h, + }) + require.NoError(t, err) + autoIDs = append(autoIDs, d.ID) + } + + // Seed 1 permanent deploy (must NOT be touched — no reminders reset, no + // updated_at bump from this call). + perm, err := models.CreateDeployment(ctx, db, models.CreateDeploymentParams{ + TeamID: teamID, + AppID: "app-perm-" + uuid.NewString()[:8], + Tier: "hobby", + TTLPolicy: models.DeployTTLPolicyPermanent, + }) + require.NoError(t, err) + permUpdatedBefore := readTimestamp(t, db, perm.ID, "updated_at") + + // Seed 1 custom deploy (12h custom TTL). + custom, err := models.CreateDeployment(ctx, db, models.CreateDeploymentParams{ + TeamID: teamID, + AppID: "app-custom-" + uuid.NewString()[:8], + Tier: "hobby", + TTLPolicy: models.DeployTTLPolicyCustom, + TTLHours: 12, + }) + require.NoError(t, err) + customExpiresBefore := readTimestamp(t, db, custom.ID, "expires_at") + customUpdatedBefore := readTimestamp(t, db, custom.ID, "updated_at") + + // Seed 1 deleted auto_24h deploy (terminal status — must NOT be touched). + deleted, err := models.CreateDeployment(ctx, db, models.CreateDeploymentParams{ + TeamID: teamID, + AppID: "app-deleted-" + uuid.NewString()[:8], + Tier: "hobby", + TTLPolicy: models.DeployTTLPolicyAuto24h, + }) + require.NoError(t, err) + _, err = db.ExecContext(ctx, `UPDATE deployments SET status = 'deleted' WHERE id = $1`, deleted.ID) + require.NoError(t, err) + deletedUpdatedBefore := readTimestamp(t, db, deleted.ID, "updated_at") + + // Sleep a tick so any errant updated_at bump is observable. + time.Sleep(20 * time.Millisecond) + + // Act. + result, err := models.PromoteDeploymentTTLsForTeam(ctx, db, teamID) + require.NoError(t, err) + + // Result accounting: 3 deploys promoted + team default flipped (the + // team was seeded by MustCreateTeamDB whose row defaults to auto_24h). + assert.EqualValues(t, 3, result.DeploysPromoted, + "only the 3 auto_24h non-terminal rows must be promoted — got %d", result.DeploysPromoted) + assert.True(t, result.TeamDefaultFlipped, + "team default starts as auto_24h (MustCreateTeamDB default) — promote MUST flip it") + + // All 3 auto rows: now permanent, expires_at NULL, reminders cleared. + for _, id := range autoIDs { + var policy string + var expires sql.NullTime + var reminders int + require.NoError(t, db.QueryRowContext(ctx, + `SELECT ttl_policy, expires_at, reminders_sent FROM deployments WHERE id = $1`, + id, + ).Scan(&policy, &expires, &reminders)) + assert.Equal(t, models.DeployTTLPolicyPermanent, policy, "auto_24h row %s must be permanent", id) + assert.False(t, expires.Valid, "auto_24h row %s must have expires_at = NULL", id) + assert.Equal(t, 0, reminders, "auto_24h row %s must have reminders_sent reset to 0", id) + } + + // permanent row: ttl_policy unchanged, expires_at unchanged, updated_at + // not bumped by THIS call (the WHERE ttl_policy='auto_24h' clause + // excludes it). + assert.Equal(t, permUpdatedBefore, readTimestamp(t, db, perm.ID, "updated_at"), + "permanent row must NOT have updated_at bumped — it was excluded by the promote WHERE clause") + + // custom row: ttl_policy unchanged, expires_at unchanged, updated_at + // not bumped. + var customPolicyAfter string + require.NoError(t, db.QueryRowContext(ctx, + `SELECT ttl_policy FROM deployments WHERE id = $1`, custom.ID, + ).Scan(&customPolicyAfter)) + assert.Equal(t, models.DeployTTLPolicyCustom, customPolicyAfter, + "custom row must NOT have ttl_policy clobbered") + assert.Equal(t, customExpiresBefore, readTimestamp(t, db, custom.ID, "expires_at"), + "custom row's expires_at must NOT be cleared") + assert.Equal(t, customUpdatedBefore, readTimestamp(t, db, custom.ID, "updated_at"), + "custom row must NOT have updated_at bumped") + + // deleted row: terminal status excluded by the promote WHERE clause. + assert.Equal(t, deletedUpdatedBefore, readTimestamp(t, db, deleted.ID, "updated_at"), + "deleted row must NOT be touched — terminal status excluded by WHERE") +} + +// TestPromoteDeploymentTTLsForTeam_PreservesCustomTeamDefault pins the +// "user-explicit choice survives an upgrade" contract: a team whose +// default_deployment_ttl_policy is already 'permanent' must NOT have its +// row UPDATEd (no-op via the WHERE clause), and the result reports +// TeamDefaultFlipped=false. +func TestPromoteDeploymentTTLsForTeam_PreservesCustomTeamDefault(t *testing.T) { + requireDB(t) + db, cleanDB := testhelpers.SetupTestDB(t) + defer cleanDB() + + ctx := context.Background() + teamID := uuid.MustParse(testhelpers.MustCreateTeamDB(t, db, "hobby")) + defer db.Exec(`DELETE FROM teams WHERE id = $1`, teamID) + + // User has already explicitly opted into permanent defaults. + require.NoError(t, models.UpdateTeamDefaultDeploymentTTLPolicy(ctx, db, teamID, "permanent")) + + // Seed one auto_24h deploy so the deploys-promoted path still runs + // independently of the team-default decision. + d, err := models.CreateDeployment(ctx, db, models.CreateDeploymentParams{ + TeamID: teamID, + AppID: "app-cust-default-" + uuid.NewString()[:8], + Tier: "hobby", + TTLPolicy: models.DeployTTLPolicyAuto24h, + }) + require.NoError(t, err) + + result, err := models.PromoteDeploymentTTLsForTeam(ctx, db, teamID) + require.NoError(t, err) + + assert.False(t, result.TeamDefaultFlipped, + "a team with default='permanent' must NOT be flipped (WHERE clause excludes it)") + assert.EqualValues(t, 1, result.DeploysPromoted, + "deploy promotion runs independently of the team-default decision") + + // Sanity-confirm the team row really still says permanent. + var policy string + require.NoError(t, db.QueryRowContext(ctx, + `SELECT default_deployment_ttl_policy FROM teams WHERE id = $1`, teamID, + ).Scan(&policy)) + assert.Equal(t, "permanent", policy, + "team default must remain 'permanent' — no clobber on a user-explicit choice") + + // The auto deploy still got promoted. + var deployPolicy string + require.NoError(t, db.QueryRowContext(ctx, + `SELECT ttl_policy FROM deployments WHERE id = $1`, d.ID, + ).Scan(&deployPolicy)) + assert.Equal(t, models.DeployTTLPolicyPermanent, deployPolicy) +} + +// TestPromoteDeploymentTTLsForTeam_NoopOnAlreadyPromotedTeam is the +// idempotency invariant: calling promote a second time on a team whose +// state is already promoted returns a clean noop (0 deploys, default +// unflipped, no error). Critical because the webhook redelivery path can +// re-fire subscription.charged for the same upgrade. +func TestPromoteDeploymentTTLsForTeam_NoopOnAlreadyPromotedTeam(t *testing.T) { + requireDB(t) + db, cleanDB := testhelpers.SetupTestDB(t) + defer cleanDB() + + ctx := context.Background() + teamID := uuid.MustParse(testhelpers.MustCreateTeamDB(t, db, "hobby")) + defer db.Exec(`DELETE FROM teams WHERE id = $1`, teamID) + + // First call promotes the team default + zero deploys (no auto rows seeded). + first, err := models.PromoteDeploymentTTLsForTeam(ctx, db, teamID) + require.NoError(t, err) + assert.True(t, first.TeamDefaultFlipped) + assert.EqualValues(t, 0, first.DeploysPromoted) + + // Second call is a clean noop — nothing left to promote. + second, err := models.PromoteDeploymentTTLsForTeam(ctx, db, teamID) + require.NoError(t, err) + assert.False(t, second.TeamDefaultFlipped, + "a re-call on a promoted team must NOT re-flip the default") + assert.EqualValues(t, 0, second.DeploysPromoted, + "a re-call on a promoted team must report 0 deploys promoted") +} + +// TestPromoteDeploymentTTLsForTeam_OnlyTouchesTargetTeam is the cross-team +// isolation guard. Two teams both with auto_24h state — promoting team A +// must NOT touch team B. +func TestPromoteDeploymentTTLsForTeam_OnlyTouchesTargetTeam(t *testing.T) { + requireDB(t) + db, cleanDB := testhelpers.SetupTestDB(t) + defer cleanDB() + + ctx := context.Background() + teamA := uuid.MustParse(testhelpers.MustCreateTeamDB(t, db, "hobby")) + teamB := uuid.MustParse(testhelpers.MustCreateTeamDB(t, db, "hobby")) + defer db.Exec(`DELETE FROM teams WHERE id IN ($1, $2)`, teamA, teamB) + + // Seed an auto_24h deploy in each team. + dA, err := models.CreateDeployment(ctx, db, models.CreateDeploymentParams{ + TeamID: teamA, AppID: "app-A-" + uuid.NewString()[:8], Tier: "hobby", + TTLPolicy: models.DeployTTLPolicyAuto24h, + }) + require.NoError(t, err) + dB, err := models.CreateDeployment(ctx, db, models.CreateDeploymentParams{ + TeamID: teamB, AppID: "app-B-" + uuid.NewString()[:8], Tier: "hobby", + TTLPolicy: models.DeployTTLPolicyAuto24h, + }) + require.NoError(t, err) + + // Promote ONLY team A. + result, err := models.PromoteDeploymentTTLsForTeam(ctx, db, teamA) + require.NoError(t, err) + assert.EqualValues(t, 1, result.DeploysPromoted) + + // Team A's deploy is permanent. + var aPolicy string + require.NoError(t, db.QueryRowContext(ctx, `SELECT ttl_policy FROM deployments WHERE id = $1`, dA.ID).Scan(&aPolicy)) + assert.Equal(t, models.DeployTTLPolicyPermanent, aPolicy) + + // Team B's deploy is STILL auto_24h. + var bPolicy string + require.NoError(t, db.QueryRowContext(ctx, `SELECT ttl_policy FROM deployments WHERE id = $1`, dB.ID).Scan(&bPolicy)) + assert.Equal(t, models.DeployTTLPolicyAuto24h, bPolicy, + "cross-team isolation violation: promote(A) must not touch team B's deploys") + + // Team B's default is STILL auto_24h. + var bDefault string + require.NoError(t, db.QueryRowContext(ctx, `SELECT default_deployment_ttl_policy FROM teams WHERE id = $1`, teamB).Scan(&bDefault)) + assert.Equal(t, "auto_24h", bDefault, + "cross-team isolation violation: promote(A) must not touch team B's default") +} + +// TestPlansRegistryUpgradeTargets_AllInvokePromoteGuard is the rule-18 +// registry-iterating regression test: for every paid tier in plans.Registry, +// plans.Rank(tier) >= plans.Rank("hobby") must hold — which is the guard the +// billing handler uses to decide whether to call PromoteDeploymentTTLsForTeam. +// A future tier added below 'hobby' would silently skip the promote path; this +// test forces a deliberate update of the guard if the rank table is restructured. +func TestPlansRegistryUpgradeTargets_AllInvokePromoteGuard(t *testing.T) { + hobbyRank := plans.Rank("hobby") + require.Equal(t, 2, hobbyRank, "Rank('hobby') must be 2 — see common/plans/rank.go") + + paidTiers := []string{"hobby", "hobby_plus", "pro", "growth", "team"} + for _, tier := range paidTiers { + assert.GreaterOrEqual(t, plans.Rank(tier), hobbyRank, + "paid tier %q must rank >= hobby — billing.go promote guard would silently skip otherwise", tier) + } + + freeTiers := []string{"anonymous", "free"} + for _, tier := range freeTiers { + assert.Less(t, plans.Rank(tier), hobbyRank, + "non-paid tier %q must rank < hobby — promote must NOT fire for it", tier) + } +} + +// readTimestamp reads one timestamp-shaped column from a deployments row. +// A NULL column returns the zero time so callers comparing two reads +// correctly assert "value unchanged across the operation". +func readTimestamp(t *testing.T, db *sql.DB, id uuid.UUID, column string) time.Time { + t.Helper() + var ts sql.NullTime + if err := db.QueryRow(`SELECT `+column+` FROM deployments WHERE id = $1`, id).Scan(&ts); err != nil { + t.Fatalf("readTimestamp %s: %v", column, err) + } + if !ts.Valid { + return time.Time{} + } + return ts.Time +} From 42b7d99fabadf6a9c5ad97ebdfd1b9cd9e84f961 Mon Sep 17 00:00:00 2001 From: Manas Srivastava Date: Sun, 31 May 2026 10:13:49 +0530 Subject: [PATCH 2/4] fix(backfill-tier-ttl): silence errcheck on all fmt.Fprint calls --- cmd/backfill-tier-ttl/main.go | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/cmd/backfill-tier-ttl/main.go b/cmd/backfill-tier-ttl/main.go index 9d0a2c4e..9ed030c4 100644 --- a/cmd/backfill-tier-ttl/main.go +++ b/cmd/backfill-tier-ttl/main.go @@ -116,13 +116,13 @@ func run(args []string, stdout, stderr *os.File) int { return backfillExitUsage } if *dbURL == "" { - fmt.Fprintln(stderr, "backfill-tier-ttl: DATABASE_URL is unset and -database-url not supplied") + _, _ = fmt.Fprintln(stderr, "backfill-tier-ttl: DATABASE_URL is unset and -database-url not supplied") return backfillExitUsage } db, err := sql.Open("postgres", *dbURL) if err != nil { - fmt.Fprintf(stderr, "backfill-tier-ttl: open db: %v\n", err) + _, _ = fmt.Fprintf(stderr, "backfill-tier-ttl: open db: %v\n", err) return backfillExitUsage } defer func() { _ = db.Close() }() @@ -130,13 +130,13 @@ func run(args []string, stdout, stderr *os.File) int { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) defer cancel() if err := db.PingContext(ctx); err != nil { - fmt.Fprintf(stderr, "backfill-tier-ttl: ping db: %v\n", err) + _, _ = fmt.Fprintf(stderr, "backfill-tier-ttl: ping db: %v\n", err) return backfillExitUsage } rows, err := db.QueryContext(ctx, candidateTeamSQL) if err != nil { - fmt.Fprintf(stderr, "backfill-tier-ttl: list candidates: %v\n", err) + _, _ = fmt.Fprintf(stderr, "backfill-tier-ttl: list candidates: %v\n", err) return backfillExitUsage } defer func() { _ = rows.Close() }() @@ -145,7 +145,7 @@ func run(args []string, stdout, stderr *os.File) int { for rows.Next() { var c candidate if err := rows.Scan(&c.teamID, &c.tier, &c.teamDefault, &c.autoDeployCount); err != nil { - fmt.Fprintf(stderr, "backfill-tier-ttl: scan: %v\n", err) + _, _ = fmt.Fprintf(stderr, "backfill-tier-ttl: scan: %v\n", err) return backfillExitUsage } // Skip teams already fully promoted — nothing to do, keeps the @@ -156,7 +156,7 @@ func run(args []string, stdout, stderr *os.File) int { candidates = append(candidates, c) } if err := rows.Err(); err != nil { - fmt.Fprintf(stderr, "backfill-tier-ttl: rows: %v\n", err) + _, _ = fmt.Fprintf(stderr, "backfill-tier-ttl: rows: %v\n", err) return backfillExitUsage } @@ -164,13 +164,13 @@ func run(args []string, stdout, stderr *os.File) int { if *apply { mode = "APPLY" } - fmt.Fprintf(stdout, "backfill-tier-ttl: mode=%s candidates=%d\n", mode, len(candidates)) + _, _ = fmt.Fprintf(stdout, "backfill-tier-ttl: mode=%s candidates=%d\n", mode, len(candidates)) for _, c := range candidates { - fmt.Fprintf(stdout, " team=%s tier=%s team_default=%s auto_deploys=%d\n", + _, _ = fmt.Fprintf(stdout, " team=%s tier=%s team_default=%s auto_deploys=%d\n", c.teamID, c.tier, c.teamDefault, c.autoDeployCount) } if !*apply { - fmt.Fprintln(stdout, "backfill-tier-ttl: dry-run complete — re-run with -apply to mutate") + _, _ = fmt.Fprintln(stdout, "backfill-tier-ttl: dry-run complete — re-run with -apply to mutate") return backfillExitOK } @@ -179,14 +179,14 @@ func run(args []string, stdout, stderr *os.File) int { result, promoteErr := models.PromoteDeploymentTTLsForTeam(ctx, db, c.teamID) if promoteErr != nil { errored++ - fmt.Fprintf(stderr, " team=%s ERROR: %v\n", c.teamID, promoteErr) + _, _ = fmt.Fprintf(stderr, " team=%s ERROR: %v\n", c.teamID, promoteErr) continue } ok++ - fmt.Fprintf(stdout, " team=%s OK promoted_deploys=%d team_default_flipped=%t\n", + _, _ = fmt.Fprintf(stdout, " team=%s OK promoted_deploys=%d team_default_flipped=%t\n", c.teamID, result.DeploysPromoted, result.TeamDefaultFlipped) } - fmt.Fprintf(stdout, "backfill-tier-ttl: applied — ok=%d errored=%d\n", ok, errored) + _, _ = fmt.Fprintf(stdout, "backfill-tier-ttl: applied — ok=%d errored=%d\n", ok, errored) if errored > 0 { // The function is idempotent — operator re-runs for the residual. return backfillExitPartial From e61babf3e5354507b16d247a83cfaad5ab7d81a2 Mon Sep 17 00:00:00 2001 From: Manas Srivastava Date: Sun, 31 May 2026 10:41:50 +0530 Subject: [PATCH 3/4] test(api#212): close 100% patch-coverage gap on tier-upgrade TTL promote MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the missing branch coverage for the Pro-upgrade auto-promote TTL fix landed in 82de682, taking the three changed surfaces to 100%: - cmd/backfill-tier-ttl: refactored main() → exitFn-wrapped run() with injectable openDB + promoteFn seams (mirrors cmd/openapi-snapshot). Ten tests cover usage errors, DB open/ping/query/scan/rows failures, dry-run vs apply modes, mixed ok/error per-team tallying, env-var fallback, and the main() exit-code dispatch. - models.PromoteDeploymentTTLsForTeam: six sqlmock-driven tests for the begin/exec/rows-affected/commit error wrappers that a real Postgres test DB can't drive on demand. - handlers.handleSubscriptionCharged + emitTTLPoliciesPromotedAudit: added the promoteDeploymentTTLsForTeamFn seam (same pattern as billingPortalFactory) so the fail-open promote-error branch is reachable. Two tests: nil-db audit early-return + webhook still 200s on a simulated promote tx failure. No production behaviour change: the cmd refactor keeps the same exit codes and the handler seam is a package-level var pointing at the real models call, swapped only by tests. Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/backfill-tier-ttl/main.go | 27 +- cmd/backfill-tier-ttl/main_test.go | 352 ++++++++++++++++++ internal/handlers/billing.go | 12 +- .../billing_ttl_promote_error_test.go | 261 +++++++++++++ internal/models/coverage_promote_ttl_test.go | 113 ++++++ 5 files changed, 758 insertions(+), 7 deletions(-) create mode 100644 cmd/backfill-tier-ttl/main_test.go create mode 100644 internal/handlers/billing_ttl_promote_error_test.go create mode 100644 internal/models/coverage_promote_ttl_test.go diff --git a/cmd/backfill-tier-ttl/main.go b/cmd/backfill-tier-ttl/main.go index 9ed030c4..f84da868 100644 --- a/cmd/backfill-tier-ttl/main.go +++ b/cmd/backfill-tier-ttl/main.go @@ -57,6 +57,7 @@ import ( "errors" "flag" "fmt" + "io" "os" "time" @@ -101,12 +102,26 @@ const candidateTeamSQL = ` ORDER BY t.created_at ASC ` -func main() { os.Exit(run(os.Args[1:], os.Stdout, os.Stderr)) } +// exitFn is os.Exit at runtime; tests swap it so the main() body becomes +// a measurable statement instead of an irreducible coverage hole. Mirrors +// the pattern in cmd/openapi-snapshot/main.go. +var exitFn = os.Exit + +// openDB is the *sql.DB factory; tests swap it for a sqlmock-backed handle +// so the run() body is exercisable without a real postgres listener. Default +// uses lib/pq. +var openDB = func(dsn string) (*sql.DB, error) { return sql.Open("postgres", dsn) } + +// promoteFn is the model call the apply-loop drives. Tests swap it so the +// success/error reporting branches at the bottom of run() are reachable +// without a populated platform DB. +var promoteFn = models.PromoteDeploymentTTLsForTeam + +func main() { exitFn(run(os.Args[1:], os.Stdout, os.Stderr)) } // run is the testable body of main — splits CLI parsing from os.Exit so the -// command's exit-code surface can be pinned by a unit test if the harness -// is ever extended. -func run(args []string, stdout, stderr *os.File) int { +// command's exit-code surface can be pinned by a unit test. +func run(args []string, stdout, stderr io.Writer) int { fs := flag.NewFlagSet("backfill-tier-ttl", flag.ContinueOnError) fs.SetOutput(stderr) apply := fs.Bool("apply", false, "actually mutate the DB (default: dry-run, no mutations)") @@ -120,7 +135,7 @@ func run(args []string, stdout, stderr *os.File) int { return backfillExitUsage } - db, err := sql.Open("postgres", *dbURL) + db, err := openDB(*dbURL) if err != nil { _, _ = fmt.Fprintf(stderr, "backfill-tier-ttl: open db: %v\n", err) return backfillExitUsage @@ -176,7 +191,7 @@ func run(args []string, stdout, stderr *os.File) int { var ok, errored int for _, c := range candidates { - result, promoteErr := models.PromoteDeploymentTTLsForTeam(ctx, db, c.teamID) + result, promoteErr := promoteFn(ctx, db, c.teamID) if promoteErr != nil { errored++ _, _ = fmt.Fprintf(stderr, " team=%s ERROR: %v\n", c.teamID, promoteErr) diff --git a/cmd/backfill-tier-ttl/main_test.go b/cmd/backfill-tier-ttl/main_test.go new file mode 100644 index 00000000..01799fa2 --- /dev/null +++ b/cmd/backfill-tier-ttl/main_test.go @@ -0,0 +1,352 @@ +package main + +// main_test.go — coverage for the backfill-tier-ttl operator tool. Mirrors +// the cmd/openapi-snapshot pattern: main() forwards os.Exit through a +// swappable exitFn, and the real run() body is driven directly with an +// injected sqlmock-backed *sql.DB plus an injected promoteFn so every +// exit-code branch is reachable without standing up a real platform DB. +// +// The tool itself is one-off operator surgery; the test discipline here +// matches the 100% patch-coverage rule from CLAUDE.md (rule 25's sibling +// in feedback_coverage_95_floor_100_patch.md). Each branch in run() — +// usage errors, db-open error, ping error, query/scan errors, dry-run +// summary, apply with mixed ok/error per-team — has a named test. + +import ( + "bytes" + "context" + "database/sql" + "errors" + "os" + "strings" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/google/uuid" + "github.com/stretchr/testify/require" + + "instant.dev/internal/models" +) + +// withPromoteFn swaps the model call so the apply-loop's success and error +// branches are driveable from the unit-test layer. +func withPromoteFn(t *testing.T, fn func(context.Context, *sql.DB, uuid.UUID) (models.PromoteDeploymentTTLsResult, error)) { + t.Helper() + orig := promoteFn + promoteFn = fn + t.Cleanup(func() { promoteFn = orig }) +} + +func TestRun_MissingDBURL_ReturnsUsage(t *testing.T) { + // Clear DATABASE_URL so the flag default is the empty string. + t.Setenv("DATABASE_URL", "") + var stdout, stderr bytes.Buffer + code := run(nil, &stdout, &stderr) + if code != backfillExitUsage { + t.Fatalf("expected exit %d for missing DATABASE_URL, got %d", backfillExitUsage, code) + } + if !strings.Contains(stderr.String(), "DATABASE_URL is unset") { + t.Errorf("expected stderr to mention 'DATABASE_URL is unset', got: %q", stderr.String()) + } +} + +func TestRun_BadFlag_ReturnsUsage(t *testing.T) { + var stdout, stderr bytes.Buffer + code := run([]string{"--no-such-flag"}, &stdout, &stderr) + if code != backfillExitUsage { + t.Errorf("expected exit %d for unknown flag, got %d", backfillExitUsage, code) + } +} + +func TestRun_OpenDBError_ReturnsUsage(t *testing.T) { + origOpen := openDB + openDB = func(_ string) (*sql.DB, error) { return nil, errors.New("simulated open failure") } + t.Cleanup(func() { openDB = origOpen }) + + var stdout, stderr bytes.Buffer + code := run([]string{"-database-url", "postgres://ignored"}, &stdout, &stderr) + if code != backfillExitUsage { + t.Fatalf("expected exit %d on open failure, got %d", backfillExitUsage, code) + } + if !strings.Contains(stderr.String(), "open db") { + t.Errorf("expected stderr to wrap 'open db' error, got: %q", stderr.String()) + } +} + +func TestRun_PingError_ReturnsUsage(t *testing.T) { + // sqlmock's ExpectPing only fires when MonitorPingsOption is set — + // otherwise db.PingContext returns nil without consulting the mock. + db, mock, err := sqlmock.New( + sqlmock.QueryMatcherOption(sqlmock.QueryMatcherRegexp), + sqlmock.MonitorPingsOption(true), + ) + require.NoError(t, err) + t.Cleanup(func() { _ = db.Close() }) + mock.ExpectPing().WillReturnError(errors.New("simulated ping failure")) + + origOpen := openDB + openDB = func(_ string) (*sql.DB, error) { return db, nil } + t.Cleanup(func() { openDB = origOpen }) + + var stdout, stderr bytes.Buffer + code := run([]string{"-database-url", "postgres://ignored"}, &stdout, &stderr) + if code != backfillExitUsage { + t.Fatalf("expected exit %d on ping failure, got %d (stderr: %s)", backfillExitUsage, code, stderr.String()) + } + if !strings.Contains(stderr.String(), "ping db") { + t.Errorf("expected stderr to mention 'ping db', got: %q", stderr.String()) + } +} + +func TestRun_QueryError_ReturnsUsage(t *testing.T) { + db, mock, err := sqlmock.New( + sqlmock.QueryMatcherOption(sqlmock.QueryMatcherRegexp), + sqlmock.MonitorPingsOption(true), + ) + require.NoError(t, err) + t.Cleanup(func() { _ = db.Close() }) + mock.ExpectPing() + mock.ExpectQuery(`FROM teams t`).WillReturnError(errors.New("simulated list failure")) + + origOpen := openDB + openDB = func(_ string) (*sql.DB, error) { return db, nil } + t.Cleanup(func() { openDB = origOpen }) + + var stdout, stderr bytes.Buffer + code := run([]string{"-database-url", "postgres://ignored"}, &stdout, &stderr) + if code != backfillExitUsage { + t.Fatalf("expected exit %d on query failure, got %d (stderr: %s)", backfillExitUsage, code, stderr.String()) + } + if !strings.Contains(stderr.String(), "list candidates") { + t.Errorf("expected stderr to mention 'list candidates', got: %q", stderr.String()) + } +} + +func TestRun_ScanError_ReturnsUsage(t *testing.T) { + db, mock, err := sqlmock.New( + sqlmock.QueryMatcherOption(sqlmock.QueryMatcherRegexp), + sqlmock.MonitorPingsOption(true), + ) + require.NoError(t, err) + t.Cleanup(func() { _ = db.Close() }) + mock.ExpectPing() + // One row that's malformed for Scan(uuid,string,string,int): drop the int. + mock.ExpectQuery(`FROM teams t`). + WillReturnRows(sqlmock.NewRows([]string{"id", "plan_tier", "team_default"}). + AddRow(uuid.New(), "hobby", "auto_24h")) + + origOpen := openDB + openDB = func(_ string) (*sql.DB, error) { return db, nil } + t.Cleanup(func() { openDB = origOpen }) + + var stdout, stderr bytes.Buffer + code := run([]string{"-database-url", "postgres://ignored"}, &stdout, &stderr) + if code != backfillExitUsage { + t.Fatalf("expected exit %d on scan failure, got %d (stderr: %s)", backfillExitUsage, code, stderr.String()) + } + if !strings.Contains(stderr.String(), "backfill-tier-ttl: scan") { + t.Errorf("expected stderr to mention 'scan', got: %q", stderr.String()) + } +} + +func TestRun_RowsErr_ReturnsUsage(t *testing.T) { + db, mock, err := sqlmock.New( + sqlmock.QueryMatcherOption(sqlmock.QueryMatcherRegexp), + sqlmock.MonitorPingsOption(true), + ) + require.NoError(t, err) + t.Cleanup(func() { _ = db.Close() }) + mock.ExpectPing() + cols := []string{"id", "plan_tier", "team_default", "auto_deploy_count"} + mock.ExpectQuery(`FROM teams t`). + WillReturnRows(sqlmock.NewRows(cols). + AddRow(uuid.New(), "hobby", "auto_24h", 1). + RowError(0, errors.New("simulated rows.Err"))) + + origOpen := openDB + openDB = func(_ string) (*sql.DB, error) { return db, nil } + t.Cleanup(func() { openDB = origOpen }) + + var stdout, stderr bytes.Buffer + code := run([]string{"-database-url", "postgres://ignored"}, &stdout, &stderr) + if code != backfillExitUsage { + t.Fatalf("expected exit %d on rows.Err, got %d (stderr: %s)", backfillExitUsage, code, stderr.String()) + } + if !strings.Contains(stderr.String(), "backfill-tier-ttl: rows") { + t.Errorf("expected stderr to mention 'rows', got: %q", stderr.String()) + } +} + +func TestRun_DryRunMode_PrintsSummaryAndSkipsApply(t *testing.T) { + db, mock, err := sqlmock.New( + sqlmock.QueryMatcherOption(sqlmock.QueryMatcherRegexp), + sqlmock.MonitorPingsOption(true), + ) + require.NoError(t, err) + t.Cleanup(func() { _ = db.Close() }) + mock.ExpectPing() + cols := []string{"id", "plan_tier", "team_default", "auto_deploy_count"} + candidateA := uuid.New() + candidateB := uuid.New() + mock.ExpectQuery(`FROM teams t`). + WillReturnRows(sqlmock.NewRows(cols). + // candidate A: team default still auto_24h, no auto deploys + AddRow(candidateA, "hobby", "auto_24h", 0). + // candidate B: team default permanent, 2 auto deploys (still a candidate) + AddRow(candidateB, "pro", "permanent", 2). + // skipped row: team default permanent AND 0 auto deploys → excluded + AddRow(uuid.New(), "team", "permanent", 0)) + + origOpen := openDB + openDB = func(_ string) (*sql.DB, error) { return db, nil } + t.Cleanup(func() { openDB = origOpen }) + + // Sentinel: dry-run must NOT call promoteFn. + called := false + withPromoteFn(t, func(context.Context, *sql.DB, uuid.UUID) (models.PromoteDeploymentTTLsResult, error) { + called = true + return models.PromoteDeploymentTTLsResult{}, nil + }) + + var stdout, stderr bytes.Buffer + code := run([]string{"-database-url", "postgres://ignored"}, &stdout, &stderr) + if code != backfillExitOK { + t.Fatalf("expected exit %d on dry-run, got %d (stderr: %s)", backfillExitOK, code, stderr.String()) + } + if called { + t.Errorf("dry-run mode MUST NOT call promoteFn — it would mutate the DB") + } + out := stdout.String() + if !strings.Contains(out, "mode=DRY-RUN") { + t.Errorf("expected dry-run banner in stdout, got: %q", out) + } + if !strings.Contains(out, "candidates=2") { + t.Errorf("expected 2 candidates (third skipped — already promoted), got: %q", out) + } + if !strings.Contains(out, "dry-run complete") { + t.Errorf("expected dry-run completion message, got: %q", out) + } +} + +func TestRun_ApplyMode_MixedOkAndError_ReturnsPartial(t *testing.T) { + db, mock, err := sqlmock.New( + sqlmock.QueryMatcherOption(sqlmock.QueryMatcherRegexp), + sqlmock.MonitorPingsOption(true), + ) + require.NoError(t, err) + t.Cleanup(func() { _ = db.Close() }) + mock.ExpectPing() + cols := []string{"id", "plan_tier", "team_default", "auto_deploy_count"} + candidateOK := uuid.New() + candidateErr := uuid.New() + mock.ExpectQuery(`FROM teams t`). + WillReturnRows(sqlmock.NewRows(cols). + AddRow(candidateOK, "hobby", "auto_24h", 3). + AddRow(candidateErr, "pro", "auto_24h", 1)) + + origOpen := openDB + openDB = func(_ string) (*sql.DB, error) { return db, nil } + t.Cleanup(func() { openDB = origOpen }) + + withPromoteFn(t, func(_ context.Context, _ *sql.DB, id uuid.UUID) (models.PromoteDeploymentTTLsResult, error) { + if id == candidateErr { + return models.PromoteDeploymentTTLsResult{}, errors.New("simulated tx failure") + } + return models.PromoteDeploymentTTLsResult{DeploysPromoted: 3, TeamDefaultFlipped: true}, nil + }) + + var stdout, stderr bytes.Buffer + code := run([]string{"-database-url", "postgres://ignored", "-apply"}, &stdout, &stderr) + if code != backfillExitPartial { + t.Fatalf("expected exit %d when at least one team errored, got %d (stderr: %s)", backfillExitPartial, code, stderr.String()) + } + if !strings.Contains(stdout.String(), "mode=APPLY") { + t.Errorf("expected APPLY banner in stdout, got: %q", stdout.String()) + } + if !strings.Contains(stdout.String(), "ok=1 errored=1") { + t.Errorf("expected 'ok=1 errored=1' tally in stdout, got: %q", stdout.String()) + } + if !strings.Contains(stderr.String(), "simulated tx failure") { + t.Errorf("expected per-team error stderr, got: %q", stderr.String()) + } +} + +func TestRun_ApplyMode_AllOK_ReturnsOK(t *testing.T) { + db, mock, err := sqlmock.New( + sqlmock.QueryMatcherOption(sqlmock.QueryMatcherRegexp), + sqlmock.MonitorPingsOption(true), + ) + require.NoError(t, err) + t.Cleanup(func() { _ = db.Close() }) + mock.ExpectPing() + cols := []string{"id", "plan_tier", "team_default", "auto_deploy_count"} + mock.ExpectQuery(`FROM teams t`). + WillReturnRows(sqlmock.NewRows(cols).AddRow(uuid.New(), "hobby", "auto_24h", 1)) + + origOpen := openDB + openDB = func(_ string) (*sql.DB, error) { return db, nil } + t.Cleanup(func() { openDB = origOpen }) + + withPromoteFn(t, func(context.Context, *sql.DB, uuid.UUID) (models.PromoteDeploymentTTLsResult, error) { + return models.PromoteDeploymentTTLsResult{DeploysPromoted: 1, TeamDefaultFlipped: true}, nil + }) + + var stdout, stderr bytes.Buffer + code := run([]string{"-database-url", "postgres://ignored", "-apply"}, &stdout, &stderr) + if code != backfillExitOK { + t.Fatalf("expected exit %d on clean apply, got %d (stderr: %s)", backfillExitOK, code, stderr.String()) + } + if !strings.Contains(stdout.String(), "ok=1 errored=0") { + t.Errorf("expected 'ok=1 errored=0' in stdout, got: %q", stdout.String()) + } +} + +// TestMain_DispatchesViaExitFn covers the main() entry-point line by swapping +// exitFn for a capture closure. os.Args[1:] when invoked under `go test` +// includes test-framework flags, which flag.Parse rejects → run returns the +// usage code. We assert exitFn was invoked (proving main forwards run's exit +// code), not the specific value. +func TestMain_DispatchesViaExitFn(t *testing.T) { + captured := -1 + orig := exitFn + exitFn = func(code int) { captured = code } + t.Cleanup(func() { exitFn = orig }) + main() + if captured < 0 { + t.Errorf("expected exitFn to be invoked, got captured=%d", captured) + } +} + +// TestRun_DBURLFromEnv exercises the os.Getenv fallback for the +// -database-url flag default. We point openDB at a sqlmock handle and +// assert we made it past the empty-URL guard. +func TestRun_DBURLFromEnv(t *testing.T) { + t.Setenv("DATABASE_URL", "postgres://from-env-only") + + db, mock, err := sqlmock.New( + sqlmock.QueryMatcherOption(sqlmock.QueryMatcherRegexp), + sqlmock.MonitorPingsOption(true), + ) + require.NoError(t, err) + t.Cleanup(func() { _ = db.Close() }) + mock.ExpectPing() + mock.ExpectQuery(`FROM teams t`).WillReturnRows(sqlmock.NewRows( + []string{"id", "plan_tier", "team_default", "auto_deploy_count"})) + + origOpen := openDB + openDB = func(_ string) (*sql.DB, error) { return db, nil } + t.Cleanup(func() { openDB = origOpen }) + + var stdout, stderr bytes.Buffer + code := run(nil, &stdout, &stderr) + if code != backfillExitOK { + t.Fatalf("expected exit %d on empty dry-run, got %d (stderr: %s)", backfillExitOK, code, stderr.String()) + } + if !strings.Contains(stdout.String(), "candidates=0") { + t.Errorf("expected 0 candidates banner, got: %q", stdout.String()) + } +} + +// Compile-time guard: the os package import in main.go must still be live +// even if a future refactor stops using os.Stdout/os.Stderr directly. +var _ = os.Stdout diff --git a/internal/handlers/billing.go b/internal/handlers/billing.go index 68caca5d..346c4445 100644 --- a/internal/handlers/billing.go +++ b/internal/handlers/billing.go @@ -336,6 +336,16 @@ var billingPortalFactory = func(db *sql.DB, h *BillingHandler) BillingPortal { return &razorpaybilling.Portal{DB: db, Cfg: h.cfg} } +// promoteDeploymentTTLsForTeamFn wraps the models call the +// subscription.charged webhook makes after an upgrade tx commits. It exists +// as a package-level swap-point ONLY so the error-branch coverage test in +// billing_ttl_promote_error_test.go can drive the slog.Error + +// metrics.TierUpgradeTTLPromote("error") path without sabotaging the real +// postgres test DB (which would also break the prior UpgradeTeamAllTiers +// step). Production callers never reassign this — a single-goroutine test, +// before the handler is exercised, swaps it, runs one request, restores it. +var promoteDeploymentTTLsForTeamFn = models.PromoteDeploymentTTLsForTeam + // billingPortal returns the BillingPortal for this handler via the factory. func (h *BillingHandler) billingPortal() BillingPortal { return billingPortalFactory(h.db, h) @@ -1903,7 +1913,7 @@ func (h *BillingHandler) handleSubscriptionCharged(ctx context.Context, c *fiber // into the Loops email forwarder) — the subscription.upgraded email // already covers customer comms. if plans.Rank(tier) >= plans.Rank("hobby") { - promoteResult, promoteErr := models.PromoteDeploymentTTLsForTeam(ctx, h.db, teamID) + promoteResult, promoteErr := promoteDeploymentTTLsForTeamFn(ctx, h.db, teamID) switch { case promoteErr != nil: metrics.TierUpgradeTTLPromote.WithLabelValues("error").Inc() diff --git a/internal/handlers/billing_ttl_promote_error_test.go b/internal/handlers/billing_ttl_promote_error_test.go new file mode 100644 index 00000000..7964a42e --- /dev/null +++ b/internal/handlers/billing_ttl_promote_error_test.go @@ -0,0 +1,261 @@ +package handlers + +// billing_ttl_promote_error_test.go — coverage for the two ttl-promote +// branches in billing.go that the existing integration-style tests in +// billing_ttl_promote_test.go cannot reach against a real Postgres: +// +// - the promoteErr != nil arm of the switch (lines 1908-1916), where +// PromoteDeploymentTTLsForTeam errored AFTER the upgrade tx already +// committed (fail-open: the webhook MUST still return 200 and emit +// a slog.Error + the "error" outcome metric). +// - the if db == nil { return } early-out of emitTTLPoliciesPromotedAudit +// (lines 3550-3551), the defensive guard for misconfigured handlers. +// +// White-box (package handlers) so we can reach the promoteDeploymentTTLsForTeamFn +// seam and call emitTTLPoliciesPromotedAudit directly. Mirrors the +// billingPortalFactory seam pattern: prod default is the real models call, +// tests swap a fake before exercising the handler and restore on cleanup. +// +// Why a seam (and not a real-DB error injection)? The promote call runs +// AFTER UpgradeTeamAllTiersWithSubscription's tx has committed. To force +// promote to error from the postgres side without sabotaging the prior tx +// you'd need mid-request DDL — impossible to do from a black-box test, and +// fragile. The seam keeps the production path identical (a no-op var +// pointing at the real models call) and gives the test a deterministic +// failure injection point that maps 1-1 to the slog.Error branch. + +import ( + "bytes" + "context" + "crypto/hmac" + "crypto/sha256" + "database/sql" + "encoding/hex" + "encoding/json" + "errors" + "net/http" + "net/http/httptest" + "os" + "testing" + + "github.com/gofiber/fiber/v2" + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + _ "github.com/lib/pq" + + "instant.dev/internal/config" + "instant.dev/internal/email" + "instant.dev/internal/middleware" + "instant.dev/internal/models" +) + +// localTestJWTSecret mirrors localTestJWTSecret. Inlined because +// internal/testhelpers imports internal/handlers and an import cycle is +// not allowed in white-box test files. +const localTestJWTSecret = "test-secret-that-is-at-least-32-bytes-long!!" + +// openLocalTestDB connects to TEST_DATABASE_URL using the same driver as +// the integration suite. The caller must Skip when the env var is unset. +func openLocalTestDB(t *testing.T) *sql.DB { + t.Helper() + dsn := os.Getenv("TEST_DATABASE_URL") + db, err := sql.Open("postgres", dsn) + require.NoError(t, err, "openLocalTestDB: sql.Open") + t.Cleanup(func() { _ = db.Close() }) + require.NoError(t, db.Ping(), "openLocalTestDB: ping") + return db +} + +// mustInsertTeam inserts a teams row with the supplied plan_tier and returns +// its UUID as a string. Mirrors testhelpers.MustCreateTeamDB but without the +// cyclic import. +func mustInsertTeam(t *testing.T, db *sql.DB, planTier string) string { + t.Helper() + var id string + err := db.QueryRowContext(context.Background(), ` + INSERT INTO teams (name, plan_tier) VALUES ($1, $2) + RETURNING id::text + `, "promote-err-team-"+uuid.NewString()[:8], planTier).Scan(&id) + require.NoError(t, err, "mustInsertTeam: insert") + return id +} + +// promoteErrTestWebhookSecret is the per-file shared secret used to sign +// Razorpay webhook payloads. Distinct from billing_test.go's package-level +// const so this white-box file doesn't depend on _test package symbols. +const promoteErrTestWebhookSecret = "promote-err-webhook-secret" + +// signPromoteErrPayload returns the hex HMAC-SHA256 of the payload using +// the shared secret. Same shape as signRazorpayPayload in billing_test.go. +func signPromoteErrPayload(t *testing.T, payload []byte) string { + t.Helper() + mac := hmac.New(sha256.New, []byte(promoteErrTestWebhookSecret)) + mac.Write(payload) + return hex.EncodeToString(mac.Sum(nil)) +} + +// newPromoteErrSignedRequest builds a signed /razorpay/webhook request. +func newPromoteErrSignedRequest(t *testing.T, payload []byte) *http.Request { + t.Helper() + req := httptest.NewRequest(http.MethodPost, "/razorpay/webhook", bytes.NewReader(payload)) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Razorpay-Signature", signPromoteErrPayload(t, payload)) + return req +} + +// newPromoteErrChargedPayload builds a minimal subscription.charged event +// carrying a notes.team_id (so the handler resolves the team without +// hitting the by-subscription-id fallback) and the supplied plan_id (so +// the handler classifies the upgrade as a paid-tier transition that +// reaches the promote-block guard). +func newPromoteErrChargedPayload(t *testing.T, teamID, subID, planID string) []byte { + t.Helper() + subEntity, _ := json.Marshal(map[string]any{ + "id": subID, + "entity": "subscription", + "plan_id": planID, + "status": "active", + "notes": map[string]any{"team_id": teamID}, + }) + event := map[string]any{ + "entity": "event", + "event": "subscription.charged", + "payload": map[string]any{ + "subscription": map[string]any{ + "entity": json.RawMessage(subEntity), + }, + }, + } + out, err := json.Marshal(event) + require.NoError(t, err) + return out +} + +// newPromoteErrApp builds the Fiber app + config used by the promote-error +// test. Matches billingWebhookDBApp in billing_test.go but uses the local +// secret + lives in package handlers so we can reach the seam. +func newPromoteErrApp(t *testing.T, db *sql.DB) (*fiber.App, *config.Config) { + t.Helper() + cfg := &config.Config{ + JWTSecret: localTestJWTSecret, + RazorpayWebhookSecret: promoteErrTestWebhookSecret, + RazorpayPlanIDHobby: "plan_test_hobby_promote_err", + RazorpayPlanIDPro: "plan_test_pro_promote_err", + RazorpayPlanIDTeam: "plan_test_team_promote_err", + } + bh := NewBillingHandler(db, cfg, email.NewNoop()) + + app := fiber.New(fiber.Config{ + ErrorHandler: func(c *fiber.Ctx, err error) error { + if errors.Is(err, ErrResponseWritten) { + return nil + } + code := fiber.StatusInternalServerError + if e, ok := err.(*fiber.Error); ok { + code = e.Code + } + return c.Status(code).JSON(fiber.Map{"ok": false, "error": "internal_error"}) + }, + }) + app.Use(middleware.RequestID()) + app.Post("/razorpay/webhook", bh.RazorpayWebhook) + return app, cfg +} + +// withPromoteFn swaps promoteDeploymentTTLsForTeamFn to the supplied stub +// for the duration of the test. Cleanup restores the production default. +func withPromoteFn(t *testing.T, fn func(context.Context, *sql.DB, uuid.UUID) (models.PromoteDeploymentTTLsResult, error)) { + t.Helper() + orig := promoteDeploymentTTLsForTeamFn + promoteDeploymentTTLsForTeamFn = fn + t.Cleanup(func() { promoteDeploymentTTLsForTeamFn = orig }) +} + +// TestEmitTTLPoliciesPromotedAudit_NilDB_EarlyReturns pins the defensive +// guard that the audit helper bails when handed a nil *sql.DB. The fail- +// open contract documents "missed audit must NOT fail the webhook"; nil-db +// is the worst input the helper could see, so the guard MUST not panic +// and MUST not attempt the InsertAuditEvent call. +func TestEmitTTLPoliciesPromotedAudit_NilDB_EarlyReturns(t *testing.T) { + // If the nil guard regresses, this call panics on dereference inside + // models.InsertAuditEvent. The test passes by not panicking + by + // returning from the helper at all (no observable side effect to assert + // beyond the absence of a panic). + require.NotPanics(t, func() { + emitTTLPoliciesPromotedAudit( + context.Background(), + nil, + uuid.New(), + models.PromoteDeploymentTTLsResult{DeploysPromoted: 5, TeamDefaultFlipped: true}, + "tier_upgrade", + ) + }) +} + +// TestBillingWebhook_ChargedPromoteError_LogsAndReturnsOK pins the +// fail-open contract for the promote-error branch (lines 1908-1916). When +// PromoteDeploymentTTLsForTeam returns an error AFTER the upgrade tx +// already committed, the handler MUST: +// +// - return 200 (Razorpay redelivery cannot help — the tier flip landed) +// - NOT emit a team.ttl_policies_promoted audit row (the promote failed +// so there's nothing to record; the existing subscription.upgraded +// audit covers the customer-visible change) +// - leave the underlying tier upgrade observable (plan_tier flipped in +// the teams row by the prior UpgradeTeamAllTiersWithSubscription tx) +// +// Drives the real /razorpay/webhook endpoint against the real test DB but +// with promoteDeploymentTTLsForTeamFn swapped for a stub that returns an +// error. Skips when TEST_DATABASE_URL is unset. +func TestBillingWebhook_ChargedPromoteError_LogsAndReturnsOK(t *testing.T) { + if os.Getenv("TEST_DATABASE_URL") == "" { + t.Skip("TEST_DATABASE_URL not set; skipping DB-backed promote-error test") + } + + db := openLocalTestDB(t) + + app, cfg := newPromoteErrApp(t, db) + + teamID := mustInsertTeam(t, db, "free") + defer db.Exec(`DELETE FROM teams WHERE id = $1::uuid`, teamID) + + // Swap the seam: every call returns the same simulated tx failure. + // promoteCalls counts to confirm the handler actually reached the seam + // rather than skipping the block for an unrelated reason (e.g. plan + // classification missing tier metadata, or the rank guard tripping). + var promoteCalls int + withPromoteFn(t, func(context.Context, *sql.DB, uuid.UUID) (models.PromoteDeploymentTTLsResult, error) { + promoteCalls++ + return models.PromoteDeploymentTTLsResult{}, errors.New("simulated promote failure (tx rolled back)") + }) + + subID := "sub_promote_err_" + uuid.NewString() + payload := newPromoteErrChargedPayload(t, teamID, subID, cfg.RazorpayPlanIDPro) + + resp, err := app.Test(newPromoteErrSignedRequest(t, payload), 5000) + require.NoError(t, err) + defer resp.Body.Close() + assert.Equal(t, http.StatusOK, resp.StatusCode, + "a promote failure MUST NOT 500 the webhook — the tier upgrade tx already committed") + + assert.Equal(t, 1, promoteCalls, + "handler MUST reach the promote seam exactly once on a paid-tier subscription.charged") + + // Tier upgrade still observable — the upgrade tx commits before promote runs. + var newTier string + require.NoError(t, db.QueryRow(`SELECT plan_tier FROM teams WHERE id = $1::uuid`, teamID).Scan(&newTier)) + assert.Equal(t, "pro", newTier, + "prior upgrade tx must remain committed even if the later promote step errors") + + // No team.ttl_policies_promoted audit row — the error arm of the switch + // does NOT call emitTTLPoliciesPromotedAudit (only the success arm does). + var auditCount int + require.NoError(t, db.QueryRow(` + SELECT count(*) FROM audit_log + WHERE team_id = $1::uuid AND kind = $2 + `, teamID, models.AuditKindTeamTTLPoliciesPromoted).Scan(&auditCount)) + assert.Equal(t, 0, auditCount, + "promote-error branch MUST NOT emit a team.ttl_policies_promoted audit row — only the success branch does") +} diff --git a/internal/models/coverage_promote_ttl_test.go b/internal/models/coverage_promote_ttl_test.go new file mode 100644 index 00000000..cc1a92ee --- /dev/null +++ b/internal/models/coverage_promote_ttl_test.go @@ -0,0 +1,113 @@ +package models + +// coverage_promote_ttl_test.go — sqlmock-driven coverage for the six error +// branches in PromoteDeploymentTTLsForTeam that the real-DB integration +// tests in promote_ttl_test.go can't reach (a real Postgres won't reject +// BeginTx/Commit on demand). Each branch maps to a fmt.Errorf wrapper that +// callers grep for in NR alerts — losing one to a silent refactor would +// make on-call's job harder. +// +// White-box (package models) so we can drive a sqlmock-backed *sql.DB +// straight through the function. Mirrors the pattern in +// coverage_deploys_audit_test.go. + +import ( + "context" + "errors" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/google/uuid" + "github.com/stretchr/testify/require" +) + +// TestPromoteDeploymentTTLsForTeam_BeginTxError covers the +// "models.PromoteDeploymentTTLsForTeam: begin tx" wrapper. sqlmock can +// reject BeginTx with a configured error, which a real Postgres test DB +// won't reproduce on demand. +func TestPromoteDeploymentTTLsForTeam_BeginTxError(t *testing.T) { + db, mock := newMock(t) + mock.ExpectBegin().WillReturnError(errors.New("simulated begin failure")) + + _, err := PromoteDeploymentTTLsForTeam(context.Background(), db, uuid.New()) + require.Error(t, err) + require.ErrorContains(t, err, "begin tx") + require.ErrorContains(t, err, "simulated begin failure") +} + +// TestPromoteDeploymentTTLsForTeam_FlipTeamDefaultError covers the +// "flip_team_default" wrapper — the first UPDATE inside the tx failing. +func TestPromoteDeploymentTTLsForTeam_FlipTeamDefaultError(t *testing.T) { + db, mock := newMock(t) + mock.ExpectBegin() + mock.ExpectExec(`UPDATE teams`).WillReturnError(errors.New("simulated team update failure")) + mock.ExpectRollback() + + _, err := PromoteDeploymentTTLsForTeam(context.Background(), db, uuid.New()) + require.Error(t, err) + require.ErrorContains(t, err, "flip_team_default") + require.ErrorContains(t, err, "simulated team update failure") +} + +// TestPromoteDeploymentTTLsForTeam_TeamRowsAffectedError covers the +// "team_rows_affected" wrapper. sqlmock.NewErrorResult lets us return a +// sql.Result whose RowsAffected() errors — a state real drivers basically +// never hit but the wrapper exists so future driver swaps surface cleanly. +func TestPromoteDeploymentTTLsForTeam_TeamRowsAffectedError(t *testing.T) { + db, mock := newMock(t) + mock.ExpectBegin() + mock.ExpectExec(`UPDATE teams`).WillReturnResult(sqlmock.NewErrorResult(errors.New("simulated team rows.Affected failure"))) + mock.ExpectRollback() + + _, err := PromoteDeploymentTTLsForTeam(context.Background(), db, uuid.New()) + require.Error(t, err) + require.ErrorContains(t, err, "team_rows_affected") + require.ErrorContains(t, err, "simulated team rows.Affected failure") +} + +// TestPromoteDeploymentTTLsForTeam_PromoteDeploysError covers the +// "promote_deploys" wrapper — the second UPDATE inside the tx failing. +func TestPromoteDeploymentTTLsForTeam_PromoteDeploysError(t *testing.T) { + db, mock := newMock(t) + mock.ExpectBegin() + mock.ExpectExec(`UPDATE teams`).WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectExec(`UPDATE deployments`).WillReturnError(errors.New("simulated deploys update failure")) + mock.ExpectRollback() + + _, err := PromoteDeploymentTTLsForTeam(context.Background(), db, uuid.New()) + require.Error(t, err) + require.ErrorContains(t, err, "promote_deploys") + require.ErrorContains(t, err, "simulated deploys update failure") +} + +// TestPromoteDeploymentTTLsForTeam_DeploysRowsAffectedError covers the +// "deploys_rows_affected" wrapper. +func TestPromoteDeploymentTTLsForTeam_DeploysRowsAffectedError(t *testing.T) { + db, mock := newMock(t) + mock.ExpectBegin() + mock.ExpectExec(`UPDATE teams`).WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectExec(`UPDATE deployments`).WillReturnResult(sqlmock.NewErrorResult(errors.New("simulated deploys rows.Affected failure"))) + mock.ExpectRollback() + + _, err := PromoteDeploymentTTLsForTeam(context.Background(), db, uuid.New()) + require.Error(t, err) + require.ErrorContains(t, err, "deploys_rows_affected") + require.ErrorContains(t, err, "simulated deploys rows.Affected failure") +} + +// TestPromoteDeploymentTTLsForTeam_CommitError covers the "commit" +// wrapper. The two UPDATEs succeed but the commit itself rejects — also +// driver-rare but the wrapper has to exist for the rollback-on-defer to +// be observed in NR. +func TestPromoteDeploymentTTLsForTeam_CommitError(t *testing.T) { + db, mock := newMock(t) + mock.ExpectBegin() + mock.ExpectExec(`UPDATE teams`).WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectExec(`UPDATE deployments`).WillReturnResult(sqlmock.NewResult(0, 3)) + mock.ExpectCommit().WillReturnError(errors.New("simulated commit failure")) + + _, err := PromoteDeploymentTTLsForTeam(context.Background(), db, uuid.New()) + require.Error(t, err) + require.ErrorContains(t, err, "commit") + require.ErrorContains(t, err, "simulated commit failure") +} From d3afea5736e9c46f461a3a03c836adec527d6829 Mon Sep 17 00:00:00 2001 From: Manas Srivastava Date: Sun, 31 May 2026 10:52:52 +0530 Subject: [PATCH 4/4] test(backfill-tier-ttl): cover openDB default factory (line 113) --- cmd/backfill-tier-ttl/main_test.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/cmd/backfill-tier-ttl/main_test.go b/cmd/backfill-tier-ttl/main_test.go index 01799fa2..2e484704 100644 --- a/cmd/backfill-tier-ttl/main_test.go +++ b/cmd/backfill-tier-ttl/main_test.go @@ -347,6 +347,22 @@ func TestRun_DBURLFromEnv(t *testing.T) { } } +// TestOpenDB_DefaultFactory covers line 113 — the default openDB closure +// that wraps sql.Open("postgres", dsn). sql.Open is lazy (no real connection +// until first use), so we only assert it returns a non-nil *sql.DB without +// touching a real database. Without this test, the default factory body +// never runs because every other test swaps openDB to a sqlmock stub. +func TestOpenDB_DefaultFactory(t *testing.T) { + db, err := openDB("postgres://probe:probe@127.0.0.1:1/probe?sslmode=disable") + if err != nil { + t.Fatalf("sql.Open should not error on a syntactically-valid DSN: %v", err) + } + if db == nil { + t.Fatalf("expected non-nil *sql.DB") + } + _ = db.Close() +} + // Compile-time guard: the os package import in main.go must still be live // even if a future refactor stops using os.Stdout/os.Stderr directly. var _ = os.Stdout