Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
212 changes: 212 additions & 0 deletions .github/workflows/auth-contract-compose-pw.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
# Layer-2 auth-contract PR gate. Spins up a docker-compose stack with
# postgres + redis + the api binary BUILT FROM THIS PR'S SOURCE, then runs
# the same Playwright contract assertions that the Layer-1 prod-target
# spec runs (instanode-web/e2e/auth-contract.spec.ts + this repo's
# e2e/browser/tests/auth-contract-local.spec.ts). Difference: this fires
# on every PR and reds the PR if the contract regresses — Layer-1 catches
# regressions ~5 minutes POST-deploy, this catches them PRE-merge.
#
# Cost ceiling: ~5 min wall clock per PR (compose build dominates ~3 min).
# No path filter — the auth surface is implicit (a router change, a CORS
# config tweak, a magic-link handler tweak, a config.Load default flip
# could all break it without touching obvious "auth" paths).
#
# What this does NOT cover:
# - email delivery (worker + Brevo; covered by post-deploy auth-probe).
# - dashboard SPA cookie exchange round-trip (covered by Layer-1 prod
# spec — needs a real web origin DNS record).
# - rate-limit / abuse-defence paths (covered by unit tests).
# What this DOES cover that nothing else does:
# - the literal CORS preflight headers from the PR's api binary, against
# a real Chromium fetch — closes the 2026-05-29 / 2026-05-30 outage
# class at PR time.

name: Auth Contract (Layer-2 compose Playwright)

on:
pull_request:
branches: [master]
# NO paths-ignore. The auth surface is the union of:
# internal/router/router.go (CORS config)
# internal/handlers/auth*.go (Exchange / Email)
# internal/handlers/magic_link.go
# internal/middleware/preflight_allowlist.go
# internal/config/config.go (Environment default)
# internal/db/migrations/* (magic_link table shape)
# Any of these can regress the contract — the only honest filter is
# "every PR". The 5-min wall-clock budget makes this affordable.
workflow_dispatch:

concurrency:
group: auth-contract-compose-${{ github.ref }}
cancel-in-progress: true

jobs:
auth-contract:
runs-on: ubuntu-latest
timeout-minutes: 12
steps:
- name: Checkout api
uses: actions/checkout@v6
with:
path: api

# The Dockerfile multi-stage build does `COPY proto/`, `COPY common/`,
# `COPY api/` — so the build context needs all three as siblings.
# Identical pattern to ci.yml / deploy.yml.
- name: Checkout proto sibling (for go.mod replace ../proto)
uses: actions/checkout@v6
with:
repository: ${{ vars.PROTO_REPO || format('{0}/proto', github.repository_owner) }}
token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }}
path: proto

- name: Checkout common sibling (for go.mod replace ../common)
uses: actions/checkout@v6
with:
repository: ${{ vars.COMMON_REPO || format('{0}/common', github.repository_owner) }}
token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }}
path: common

- name: Set up Node (for Playwright)
uses: actions/setup-node@v5
with:
node-version: '20'
cache: 'npm'
cache-dependency-path: api/e2e/browser/package-lock.json

- name: Install Playwright + Chromium
working-directory: api/e2e/browser
# `npm ci` keeps lockfile drift out of CI; --with-deps installs the
# system libs Chromium needs on a fresh ubuntu-latest runner.
run: |
npm ci
npx playwright install --with-deps chromium

- name: Build + start docker-compose stack
# Compose resolves `context: ..` (in api/docker-compose.ci.yml)
# RELATIVE TO THE COMPOSE FILE'S DIRECTORY by default, which lands
# on the GitHub workspace root holding proto/, common/, api/ — exactly
# the path the multi-stage Dockerfile expects for its three COPY
# lines. Build args stamp /healthz commit_id with the real PR SHA
# so the artifact emitted below is comparable to $GITHUB_SHA.
env:
GIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
BUILD_TIME: ${{ github.event.repository.updated_at }}
VERSION: pr-${{ github.event.pull_request.number || 'manual' }}
run: |
set -euo pipefail
docker compose \
-f api/docker-compose.ci.yml \
up -d --build

- name: Wait for api /healthz to return 200
# 90s ceiling — postgres pull + start + api migration apply +
# listener bind. If we ever blow past this, the api isn't healthy
# and the test would fail downstream anyway; failing here gives a
# cleaner diagnostic.
run: |
set -euo pipefail
for i in $(seq 1 45); do
if curl -sf http://localhost:8080/healthz | tee /tmp/healthz.json | grep -q '"ok":true'; then
echo "api healthy after ${i} attempts ($((i*2))s)"
break
fi
echo "waiting for api (${i}/45)"
sleep 2
done
if ! curl -sf http://localhost:8080/healthz >/dev/null; then
echo "::error::api never became healthy in 90s"
docker compose -f api/docker-compose.ci.yml ps
docker compose -f api/docker-compose.ci.yml logs --tail=200 api
exit 1
fi
echo "── /healthz ────────────────────────────────"
cat /tmp/healthz.json
echo

- name: Run Layer-2 Playwright spec
working-directory: api/e2e/browser
env:
E2E_API_URL: http://localhost:8080
E2E_WEB_ORIGIN: http://localhost:5173
CI: 'true'
# Use the chromium-compose-pna project so Chromium's Local /
# Private Network Access checks are disabled (see playwright.config.ts
# — both origin and api live in loopback under this stack, which
# PNA blocks even though it never trips in prod's public→public flow).
run: npx playwright test tests/auth-contract-local.spec.ts --project=chromium-compose-pna --reporter=list

- name: Emit gate-fired signal (rule 25 — observability)
# Compose runs are a CI-internal signal, not a prod metric (so they
# don't need an NR alert+dashboard per rule 25's literal text). But
# we DO want to be able to answer "did the gate fire on the last
# N PRs?" without scraping job logs. A 1-line newline-delimited
# JSON artifact does that — downloadable per-run, greppable by
# date, no infrastructure required.
if: always()
# SECURITY: route every GitHub-context interpolation through env:
# rather than splicing into the shell, even though all four values
# here are GitHub-controlled enums/integers/hashes (no user-author
# input). Keeps the surface uniformly safe — same pattern as the
# ci.yml::dispatch-auth-contract-e2e step.
env:
PR_NUMBER: ${{ github.event.pull_request.number || 'manual' }}
PR_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
JOB_STATUS: ${{ job.status }}
run: |
set -euo pipefail
# Defensive shape checks — PR_NUMBER is an integer or "manual",
# SHA is hex. Cheap to enforce, blocks the (theoretical) command
# injection vector if a future GitHub bug ever lets these leak.
case "$PR_NUMBER" in
manual|[0-9]*) ;;
*) echo "::error::unexpected PR_NUMBER shape"; exit 1 ;;
esac
case "$PR_SHA" in
[0-9a-f]*) ;;
*) echo "::error::unexpected SHA shape"; exit 1 ;;
esac
case "$JOB_STATUS" in
success|failure|cancelled) ;;
*) echo "::error::unexpected JOB_STATUS"; exit 1 ;;
esac
mkdir -p /tmp/gate-signal
printf '{"gate":"auth-contract-compose-pw","pr":"%s","sha":"%s","status":"%s","ts":"%s"}\n' \
"$PR_NUMBER" \
"$PR_SHA" \
"$JOB_STATUS" \
"$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
> /tmp/gate-signal/auth-contract-compose.jsonl
cat /tmp/gate-signal/auth-contract-compose.jsonl

- name: Upload gate-fired signal artifact
if: always()
uses: actions/upload-artifact@v4
with:
name: auth-contract-gate-signal
path: /tmp/gate-signal/auth-contract-compose.jsonl
retention-days: 30

- name: Upload Playwright report on failure
if: failure()
uses: actions/upload-artifact@v4
with:
name: playwright-report-auth-contract-layer2
path: api/e2e/browser/playwright-report/
retention-days: 14

- name: Dump api logs on failure
if: failure()
run: |
echo "── docker compose ps ───────────────────────"
docker compose -f api/docker-compose.ci.yml ps || true
echo "── api logs (tail 500) ─────────────────────"
docker compose -f api/docker-compose.ci.yml logs --tail=500 api || true
echo "── postgres logs (tail 200) ────────────────"
docker compose -f api/docker-compose.ci.yml logs --tail=200 postgres || true

- name: Tear down
if: always()
run: |
docker compose -f api/docker-compose.ci.yml down -v || true
128 changes: 128 additions & 0 deletions docker-compose.ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# docker-compose.ci.yml — Layer-2 PR-gate harness.
#
# Purpose
# -------
# The Layer-1 auth-contract Playwright spec
# (instanode-web/e2e/auth-contract.spec.ts + this repo's PR-time dispatch in
# ci.yml::dispatch-auth-contract-e2e) drives Chromium against PRODUCTION api.
# It catches the AUTH-004-class regression AFTER an api PR merges + deploys.
#
# This compose stack is the Layer-2 gate: spin up a minimal real-binary api
# built from the PR's branch, run the SAME contract assertions against it
# locally inside the GH Actions runner, fail the PR BEFORE merge if the
# preflight loses ACAO / ACAC or /auth/email/start stops returning 202.
#
# Anti-goals (deliberately not in here)
# -------------------------------------
# - worker + provisioner. The auth surface does not need them. Magic-link
# /auth/email/start writes a row and returns 202 even if the downstream
# email backend is missing — that's deliberate enumeration defence (see
# handlers/magic_link.go::Start) and exactly what makes this stack
# viable without a worker.
# - object storage, NATS, mongo. None on the auth path.
# - shipping this to prod. This file is CI-only. infra/docker-compose.yml
# remains the local-dev stack; this file is a peer not a replacement.
#
# Build context
# -------------
# Built from the REPO PARENT (the workspace that holds proto/, common/, api/
# as siblings) because the Dockerfile expects all three. In CI the workflow
# checks out proto+common as siblings of api/ and runs
# docker compose -f api/docker-compose.ci.yml --project-directory .. up -d --build
# so the build context resolves the COPY proto/, COPY common/, COPY api/
# lines exactly as deploy.yml does.
#
# Resources
# ---------
# postgres:17-alpine + redis:7-alpine. The api auto-runs migrations on boot
# (main.go::runMigrations) so no separate migrator container is needed.
#
# CORS contract
# -------------
# The router (internal/router/router.go ~L237) appends
# http://localhost:5173,3000,5174 to the CORS allowlist when ENVIRONMENT=
# development. The Playwright spec stubs out http://localhost:5173 as the
# document origin so the cross-origin fetch to http://localhost:8080 is
# genuinely cross-origin and exercises the same code path that ships to
# prod.

services:
postgres:
# Postgres 17 — newer than CI's :16-alpine but compatible with every
# migration in internal/db/migrations/. Pinning to a recent major catches
# any forward-compat breakage at PR time rather than at infra-bump time.
image: postgres:17-alpine
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: instant_platform
healthcheck:
# `pg_isready` is the standard probe — accepting connections == ready
# for the api's RunMigrations call.
test: ["CMD-SHELL", "pg_isready -U postgres -d instant_platform"]
interval: 2s
timeout: 3s
retries: 30

redis:
image: redis:7-alpine
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 2s
timeout: 3s
retries: 30

api:
# Build from repo PARENT so the multi-stage Dockerfile can COPY proto/
# + common/ + api/ as siblings. The CI workflow runs `docker compose
# ... --project-directory ..` to set the build context root accordingly.
build:
context: ..
dockerfile: api/Dockerfile
args:
# Deterministic stamp so /healthz commit_id is comparable across runs
# (we want it to equal $GITHUB_SHA in CI; falls back to "ci-local"
# for laptop runs).
GIT_SHA: ${GIT_SHA:-ci-local}
BUILD_TIME: ${BUILD_TIME:-ci-local}
VERSION: ${VERSION:-ci-local}
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
ports:
- "8080:8080"
environment:
# Required by config.Load (see internal/config/config.go::Load).
DATABASE_URL: postgres://postgres:postgres@postgres:5432/instant_platform?sslmode=disable
# 64-hex = 32 raw bytes — matches AES-256-GCM key requirement. Public
# test value, never reused outside this compose stack.
AES_KEY: 0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef
JWT_SECRET: ci-test-jwt-secret-not-used-in-prod
# ENVIRONMENT=development is what unlocks the http://localhost:5173,
# :3000, :5174 origins in the CORS allowlist (router.go ~L237). Without
# this, the Playwright spec's cross-origin POST would be blocked by
# CORS and silently pass the wrong contract.
ENVIRONMENT: development
REDIS_URL: redis://redis:6379
# Disable expensive optional providers. None of these are on the auth
# surface so we set them to "noop"/empty equivalents.
INSTANT_ENABLED_SERVICES: redis,postgres
# PostgresCustomersURL is required by config.Load (default points at a
# k8s DNS name that doesn't resolve here). Point at the same Postgres
# — the auth contract doesn't exercise customer-DB provisioning so
# whether the URL works is irrelevant; we just need config.Load to
# accept it.
POSTGRES_CUSTOMERS_URL: postgres://postgres:postgres@postgres:5432/instant_platform?sslmode=disable
# Skip the geo-IP DB lookup (no MMDB volume in this stack). middleware
# GeoEnrich no-ops when the DB pointer is nil.
GEOLITE2_DB_PATH: /tmp/no-such-geolite2.mmdb
healthcheck:
# /healthz returns 200 once migrations + DB ping succeed. The wget is
# alpine-bundled so no extra package install.
test: ["CMD-SHELL", "wget -q -O- http://localhost:8080/healthz | grep -q ok || exit 1"]
interval: 3s
timeout: 3s
retries: 40
start_period: 10s
27 changes: 27 additions & 0 deletions e2e/browser/playwright.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,33 @@ export default defineConfig({
name: 'chromium',
use: { ...devices['Desktop Chrome'] },
},
{
// Layer-2 docker-compose auth-contract gate
// (tests/auth-contract-local.spec.ts) — needs Chromium's Local /
// Private Network Access checks disabled because both the document
// origin (http://localhost:5173, stubbed) and the api (http://localhost
// :8080) live in the loopback address space, and Chromium blocks
// even loopback→loopback fetches as a CORS pre-PNA "permission denied"
// when there is no Access-Control-Allow-Private-Network header.
// PROD does not hit this case (instanode.dev → api.instanode.dev are
// both public addresses), so the PNA disable is strictly a localhost
// shim — it does NOT weaken the contract under test, which is the
// CORS allow-origin + allow-credentials response from the api.
name: 'chromium-compose-pna',
testMatch: /auth-contract-local\.spec\.ts/,
use: {
...devices['Desktop Chrome'],
launchOptions: {
args: [
// Disable the full family of PNA / LNA blocking features. Names
// have shifted across Chromium versions (PrivateNetworkAccess*
// → LocalNetworkAccessChecks) so we list both — unknown names
// are silently ignored by Chromium, so over-listing is safe.
'--disable-features=LocalNetworkAccessChecks,PrivateNetworkAccessSendPreflights,PrivateNetworkAccessRespectPreflightResults,BlockInsecurePrivateNetworkRequests,PrivateNetworkAccessPermissionPrompt',
],
},
},
},
],
// No webServer — the k8s API is already running.
});
Loading
Loading