Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,17 @@ test-e2e:
# without it every test can hit the recycle gate.
#
# Requires: kubectl access to the `instant` namespace.
# E2E_ACCOUNT_TOKEN — guard secret for POST/DELETE /internal/e2e/account;
# authed cohort-flow tests (CLI device-flow complete,
# live 401 error_code shape) mint+reap a real
# is_test_cohort account against prod with it. Absent
# → those tests SKIP cleanly.
test-e2e-full:
E2E_JWT_SECRET=$(shell kubectl get secret instant-secrets -n instant -o jsonpath='{.data.JWT_SECRET}' 2>/dev/null | base64 -d) \
E2E_RAZORPAY_WEBHOOK_SECRET=$(shell kubectl get secret instant-secrets -n instant -o jsonpath='{.data.RAZORPAY_WEBHOOK_SECRET}' 2>/dev/null | base64 -d) \
E2E_RAZORPAY_PLAN_ID_PRO=$(shell kubectl get secret instant-secrets -n instant -o jsonpath='{.data.RAZORPAY_PLAN_ID_PRO}' 2>/dev/null | base64 -d) \
E2E_TEST_TOKEN=$(shell kubectl get secret instant-secrets -n instant -o jsonpath='{.data.E2E_TEST_TOKEN}' 2>/dev/null | base64 -d) \
E2E_ACCOUNT_TOKEN=$(shell kubectl get secret instant-secrets -n instant -o jsonpath='{.data.E2E_ACCOUNT_TOKEN}' 2>/dev/null | base64 -d) \
go test ./e2e/... -v -tags e2e -timeout 90s

test-e2e-docker:
Expand Down
189 changes: 189 additions & 0 deletions e2e/agent_steering_e2e_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
//go:build e2e

package e2e

// agent_steering_e2e_test.go — live e2e coverage for the 2026-06-10 agent-DX
// fixes against the LIVE api (api.instanode.dev by default):
//
// D1/D8 — a 401 agent_action steers a headless agent at the CLI device-flow +
// INSTANT_TOKEN, NOT the browser /login.
// D6 — a live 401 body carries error_code.
// D7 — an unknown provision field is echoed back under ignored_fields.
// F1 — a recycle-gate 402 returns a claim_url carrying ?t=<jwt>.
// D2 — `instant login` works end-to-end: mint cohort session → POST
// /auth/cli → POST /auth/cli/{id}/complete → poll returns api_token.

import (
"encoding/json"
"net/http"
"strings"
"testing"
)

// TestE2E_AgentSteering_Unauthorized401_SteersAtDeviceFlow (D1/D6/D8). An
// unauthenticated call to a RequireAuth-gated route returns 401 whose
// agent_action points at the CLI device-flow / INSTANT_TOKEN (not /login) and
// whose body carries error_code.
func TestE2E_AgentSteering_Unauthorized401_SteersAtDeviceFlow(t *testing.T) {
resp := get(t, "/api/v1/resources") // RequireAuth, no Bearer → 401
if resp.StatusCode != http.StatusUnauthorized {
t.Fatalf("GET /api/v1/resources without auth: want 401, got %d\n%s",
resp.StatusCode, readBody(t, resp))
}
var body map[string]any
decodeJSON(t, resp, &body)

if body["error"] != "unauthorized" {
t.Errorf("error must stay 'unauthorized' for back-compat; got %v", body["error"])
}
// D6: error_code present.
ec, _ := body["error_code"].(string)
if ec == "" {
t.Errorf("D6: live 401 body must carry a non-empty error_code; got %v", body["error_code"])
}
// D1/D8: agent_action steers at the device-flow + INSTANT_TOKEN, not /login.
action, _ := body["agent_action"].(string)
if action == "" {
t.Fatalf("agent_action must be present on a 401")
}
if !strings.Contains(action, "INSTANT_TOKEN") {
t.Errorf("D8: agent_action must name INSTANT_TOKEN; got %q", action)
}
if strings.Contains(action, "INSTANODE_TOKEN") {
t.Errorf("D8: agent_action must NOT name the old INSTANODE_TOKEN; got %q", action)
}
if !strings.Contains(action, "/auth/cli") {
t.Errorf("D1: agent_action must steer at the CLI device-flow (/auth/cli); got %q", action)
}
if strings.Contains(action, "/login") {
t.Errorf("D1: agent_action must NOT push a headless agent at /login; got %q", action)
}
}

// TestE2E_AgentSteering_UnknownProvisionField_EchoedAsIgnored (D7). A provision
// body carrying an unrecognized key ("region") succeeds (201) and echoes the
// key under ignored_fields. Uses the anonymous /cache/new path (Redis — live in
// prod, unlike /db/new which is Phase-2-gated) with a unique fingerprint so it
// doesn't collide with the recycle gate.
func TestE2E_AgentSteering_UnknownProvisionField_EchoedAsIgnored(t *testing.T) {
ip := uniqueIP(t)
// Explicit name (so the test owns it) + an unknown "region" key.
resp := post(t, "/cache/new",
map[string]any{"name": "ignored-fields-probe", "region": "mars"},
"X-Forwarded-For", ip)
if resp.StatusCode == http.StatusServiceUnavailable {
t.Skip("POST /cache/new: service not enabled (503) — skipping D7 live check")
}
if resp.StatusCode != http.StatusCreated {
t.Fatalf("POST /cache/new with unknown field: want 201, got %d\n%s",
resp.StatusCode, readBody(t, resp))
}
var body map[string]any
decodeJSON(t, resp, &body)

raw, ok := body["ignored_fields"]
if !ok {
t.Fatalf("D7: 201 response must echo ignored_fields for an unknown key; body=%v", body)
}
arr, ok := raw.([]any)
if !ok {
t.Fatalf("ignored_fields must be an array; got %T (%v)", raw, raw)
}
found := false
for _, v := range arr {
if s, _ := v.(string); s == "region" {
found = true
}
}
if !found {
t.Errorf("D7: ignored_fields must contain 'region'; got %v", arr)
}
}

// TestE2E_AgentSteering_RecycleGate402_ClaimURLHasToken (F1). When the free-tier
// recycle gate fires (402 free_tier_recycle_requires_claim), the claim_url must
// embed a minted claim JWT (?t=). Driving the gate deterministically against a
// live cluster is timing-dependent (it needs a prior provision to have aged
// out), so this test only ASSERTS the contract IF it observes the gate — it
// never forces a sleep/aging loop (would violate rate-limit discipline). It is
// a no-op (skip) when the gate doesn't fire in this run.
func TestE2E_AgentSteering_RecycleGate402_ClaimURLHasToken(t *testing.T) {
if e2eTestToken() == "" {
t.Skip("E2E_TEST_TOKEN unset — cannot isolate a fingerprint to drive the recycle gate; skipping F1 live check")
}
// A single anonymous provision on a fresh fingerprint sets the
// recycle_seen marker but won't itself gate (there's an active row). The
// deterministic gate path is exercised by the unit test
// (TestRecycleGate_FiresWith402_WhenMarkerExistsAndNoActiveRow); here we
// only validate the live contract opportunistically.
ip := uniqueIP(t)
resp := post(t, "/cache/new", nil, "X-Forwarded-For", ip)
defer resp.Body.Close()
if resp.StatusCode != http.StatusPaymentRequired {
t.Skipf("recycle gate did not fire on this run (got %d) — F1 contract proven by the unit test; skipping live assert", resp.StatusCode)
}
var body map[string]any
if err := json.NewDecoder(resp.Body).Decode(&body); err != nil {
t.Fatalf("decode 402 body: %v", err)
}
if body["error"] != "free_tier_recycle_requires_claim" {
t.Fatalf("unexpected 402 error code: %v", body["error"])
}
claimURL, _ := body["claim_url"].(string)
if !strings.Contains(claimURL, "?t=") {
t.Errorf("F1: recycle-gate claim_url must embed a minted claim JWT (?t=); got %q", claimURL)
}
}

// TestE2E_CLIDeviceFlow_Complete_FlipsSessionLive (D2). The full `instant login`
// round-trip against the live api: mint a cohort session, create a CLI session,
// complete it with the cohort Bearer, and poll for the api_token. Cohort is
// reaped on teardown.
func TestE2E_CLIDeviceFlow_Complete_FlipsSessionLive(t *testing.T) {
c, reap := mintCohort(t, "free")
defer reap()

// 1. Create a pending CLI session.
createResp := post(t, "/auth/cli", map[string]any{})
if createResp.StatusCode != http.StatusCreated {
t.Fatalf("POST /auth/cli: want 201, got %d\n%s", createResp.StatusCode, readBody(t, createResp))
}
var created struct {
SessionID string `json:"session_id"`
}
decodeJSON(t, createResp, &created)
if created.SessionID == "" {
t.Fatalf("POST /auth/cli returned no session_id")
}

// 2. Complete it with the cohort's session Bearer.
completeResp := post(t, "/auth/cli/"+created.SessionID+"/complete", nil,
"Authorization", "Bearer "+c.SessionJWT)
if completeResp.StatusCode != http.StatusOK {
t.Fatalf("POST /auth/cli/{id}/complete: want 200, got %d\n%s",
completeResp.StatusCode, readBody(t, completeResp))
}
var done struct {
OK bool `json:"ok"`
}
decodeJSON(t, completeResp, &done)
if !done.OK {
t.Fatalf("complete response must be {ok:true}")
}

// 3. Poll — must now return 200 + status:"complete" + a real api_token.
pollResp := get(t, "/auth/cli/"+created.SessionID)
if pollResp.StatusCode != http.StatusOK {
t.Fatalf("GET /auth/cli/{id} after complete: want 200, got %d\n%s",
pollResp.StatusCode, readBody(t, pollResp))
}
var poll map[string]any
decodeJSON(t, pollResp, &poll)
if poll["status"] != "complete" {
t.Errorf("D2: completed poll must carry status='complete'; got %v", poll["status"])
}
apiToken, _ := poll["api_token"].(string)
if apiToken == "" || !strings.HasPrefix(apiToken, "ink_") {
t.Errorf("D2: completed poll must return a real api_token (ink_...); got %q", apiToken)
}
}
87 changes: 87 additions & 0 deletions e2e/cohort_helpers_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
//go:build e2e

package e2e

// cohort_helpers_test.go — shared helper for the authed e2e flows added
// 2026-06-10 (D1/D2/D6). Mints a throwaway is_test_cohort account against the
// LIVE api via POST /internal/e2e/account (guarded by the X-E2E-Token header =
// E2E_ACCOUNT_TOKEN) and ALWAYS reaps it via DELETE /internal/e2e/account/:id.
// The mint/reap surface is INERT in prod unless the operator wired the secret,
// so tests that need it skip cleanly when E2E_ACCOUNT_TOKEN is unset.

import (
"bytes"
"encoding/json"
"net/http"
"os"
"testing"
)

// e2eAccountToken returns the guard secret for the /internal/e2e/account
// surface, or "" when unset (tests then skip).
func e2eAccountToken() string { return os.Getenv("E2E_ACCOUNT_TOKEN") }

// cohort is a minted ephemeral test account.
type cohort struct {
TeamID string `json:"team_id"`
UserID string `json:"user_id"`
Email string `json:"email"`
Tier string `json:"tier"`
SessionJWT string `json:"session_jwt"`
}

// mintCohort creates a real is_test_cohort account on the live api. It SKIPS the
// test when E2E_ACCOUNT_TOKEN is unset (the surface is inert without it). The
// returned reap func DELETEs the account; always defer it.
func mintCohort(t *testing.T, tier string) (cohort, func()) {
t.Helper()
tok := e2eAccountToken()
if tok == "" {
t.Skip("E2E_ACCOUNT_TOKEN unset — cohort-minting e2e flow skipped (surface is inert without it)")
}

reqBody, _ := json.Marshal(map[string]string{"tier": tier, "env": "production"})
req, err := http.NewRequest(http.MethodPost, baseURL()+"/internal/e2e/account", bytes.NewReader(reqBody))
if err != nil {
t.Fatalf("mintCohort: NewRequest: %v", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("X-E2E-Token", tok)
resp, err := client.Do(req)
if err != nil {
t.Fatalf("mintCohort: POST /internal/e2e/account: %v", err)
}
if resp.StatusCode == http.StatusNotFound {
// Inert-by-default: token wrong or surface not armed on this deploy.
resp.Body.Close()
t.Skip("POST /internal/e2e/account returned 404 — surface inert / token mismatch; skipping")
}
if resp.StatusCode != http.StatusOK {
t.Fatalf("mintCohort: want 200, got %d\n%s", resp.StatusCode, readBody(t, resp))
}
var c cohort
decodeJSON(t, resp, &c)
if c.TeamID == "" || c.SessionJWT == "" {
t.Fatalf("mintCohort: response missing team_id/session_jwt: %+v", c)
}

reap := func() {
dreq, derr := http.NewRequest(http.MethodDelete,
baseURL()+"/internal/e2e/account/"+c.TeamID, nil)
if derr != nil {
t.Logf("mintCohort reap: NewRequest: %v", derr)
return
}
dreq.Header.Set("X-E2E-Token", tok)
dresp, derr := client.Do(dreq)
if derr != nil {
t.Logf("mintCohort reap: DELETE failed (account may linger): %v", derr)
return
}
dresp.Body.Close()
if dresp.StatusCode != http.StatusOK {
t.Logf("mintCohort reap: DELETE returned %d for team %s", dresp.StatusCode, c.TeamID)
}
}
return c, reap
}
8 changes: 6 additions & 2 deletions internal/handlers/agent_action_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,15 @@ func TestRespondError_KnownCode_PopulatesAgentAction(t *testing.T) {
wantActionSubstr: "url path tokens",
},
{
name: "unauthorized points at login",
// D1/D8 (2026-06-10): the `unauthorized` agent_action steers a
// HEADLESS agent at the CLI device-flow / INSTANT_TOKEN PAT, NOT the
// browser /login page. Assert it names the device-flow endpoint and
// the canonical bearer env var.
name: "unauthorized steers headless agents at the device-flow",
code: "unauthorized",
status: fiber.StatusUnauthorized,
wantUpgradeURL: false,
wantActionSubstr: "log in at https://instanode.dev/login",
wantActionSubstr: "/auth/cli",
},
{
name: "auth_required points at login/signup",
Expand Down
5 changes: 5 additions & 0 deletions internal/handlers/billing.go
Original file line number Diff line number Diff line change
Expand Up @@ -1427,6 +1427,11 @@ func (h *BillingHandler) RazorpayWebhook(c *fiber.Ctx) error {
}
if !sigOK {
slog.Error("billing.webhook.signature_failed")
// S4 (metric half): bump the inbound-signature-failure counter so the
// "N razorpay signature failures / hour" NR alert can fire without
// grepping the slog line above. Mirrors the GitHub webhook bad-signature
// counter; the Prom rule + NR alert JSON are the infra agent's job.
metrics.RazorpayWebhookSigFail.Inc()
// B18 wave-3 hardening (2026-05-21): emit an audit_log row on every
// signature-mismatch attempt so an operator dashboard can chart
// "N auth failures / hour" without grepping NR logs. Best-effort
Expand Down
62 changes: 62 additions & 0 deletions internal/handlers/billing_webhook_sig_fail_metric_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package handlers_test

// billing_webhook_sig_fail_metric_test.go — S4 (metric half, 2026-06-10).
//
// A forged-signature POST /razorpay/webhook must bump
// instant_razorpay_webhook_sig_fail_total (mirroring the GitHub webhook
// bad-signature counter) so an operator can chart "N signature failures / hour"
// without grepping the billing.webhook.signature_failed slog line. Pre-fix the
// only signal was the log line + a best-effort audit row.

import (
"bytes"
"net/http"
"net/http/httptest"
"testing"

"github.com/gofiber/fiber/v2"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"instant.dev/internal/config"
"instant.dev/internal/email"
"instant.dev/internal/handlers"
"instant.dev/internal/metrics"
"instant.dev/internal/middleware"
)

// TestRazorpayWebhook_ForgedSignature_IncrementsSigFailCounter posts a body with
// a deliberately wrong X-Razorpay-Signature and asserts the sig-fail counter
// rises by exactly one and the response is a 4xx (Razorpay's retry contract is
// covered elsewhere; here we pin the metric). db is nil so the best-effort
// audit-row block no-ops — the counter increment is independent of it.
func TestRazorpayWebhook_ForgedSignature_IncrementsSigFailCounter(t *testing.T) {
cfg := &config.Config{
JWTSecret: "test-secret-that-is-at-least-32-bytes-long!!",
RazorpayWebhookSecret: "live_webhook_secret_for_this_test_only_xxxxxx",
}
billing := handlers.NewBillingHandler(nil, cfg, email.NewNoop())
app := fiber.New()
app.Use(middleware.RequestID())
app.Post("/razorpay/webhook", billing.RazorpayWebhook)

before := testutil.ToFloat64(metrics.RazorpayWebhookSigFail)

payload := []byte(`{"event":"subscription.charged","payload":{}}`)
req := httptest.NewRequest(http.MethodPost, "/razorpay/webhook", bytes.NewReader(payload))
req.Header.Set("Content-Type", "application/json")
// A signature that cannot verify against the configured secret.
req.Header.Set("X-Razorpay-Signature", "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef")

resp, err := app.Test(req, 5000)
require.NoError(t, err)
defer resp.Body.Close()

assert.GreaterOrEqual(t, resp.StatusCode, 400,
"a forged-signature webhook must be rejected with a 4xx")

after := testutil.ToFloat64(metrics.RazorpayWebhookSigFail)
assert.Equal(t, before+1, after,
"instant_razorpay_webhook_sig_fail_total must increment by exactly 1 on a signature failure (S4)")
}
Loading
Loading