diff --git a/.env.example b/.env.example index a80cbae..11e47a3 100644 --- a/.env.example +++ b/.env.example @@ -13,4 +13,5 @@ LOG_LEVEL="info" TRUST_PROXY=0 DOCS_ENABLED=true METRICS_ENABLED=false +METRICS_AUTH_TOKEN="" BCRYPT_ROUNDS=10 diff --git a/.env.production.example b/.env.production.example index 096f9e7..0bc206e 100644 --- a/.env.production.example +++ b/.env.production.example @@ -17,5 +17,6 @@ RATE_LIMIT_MAX_REQUESTS=100 LOG_LEVEL="info" TRUST_PROXY=1 DOCS_ENABLED=true -METRICS_ENABLED=true +METRICS_ENABLED=false +METRICS_AUTH_TOKEN="" BCRYPT_ROUNDS=10 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 983b203..2ae6664 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -29,6 +29,9 @@ concurrency: permissions: contents: read +env: + RAILWAY_CLI_VERSION: 4.31.0 + jobs: resolve-context: name: resolve deployment context @@ -237,7 +240,7 @@ jobs: node-version: 20 - name: Install Railway CLI - run: npm install --global @railway/cli@latest + run: npm install --global @railway/cli@${RAILWAY_CLI_VERSION} - name: Deploy to Railway env: diff --git a/README.md b/README.md index 3b4d775..26d9679 100644 --- a/README.md +++ b/README.md @@ -174,6 +174,7 @@ LOG_LEVEL="info" TRUST_PROXY=0 DOCS_ENABLED=true METRICS_ENABLED=false +METRICS_AUTH_TOKEN="" BCRYPT_ROUNDS=10 ``` @@ -206,6 +207,7 @@ Deployment automation is implemented through [`.github/workflows/deploy.yml`](./ - manual dispatch for intentional non-production deployments - exact-ref verification before any deployment - smoke validation for `/health`, `/ready`, and `/docs.json` +- a pinned Railway CLI version for deterministic release promotion Deployment setup material: @@ -229,6 +231,8 @@ Enable metrics locally with `METRICS_ENABLED=true` and expose: - `GET /metrics` +If metrics are enabled on any non-local environment, prefer setting `METRICS_AUTH_TOKEN` or keeping the route private at the network layer instead of exposing raw Prometheus output publicly. + Local observability assets: - [`docs/observability.md`](./docs/observability.md) diff --git a/docs/deployment/railway.md b/docs/deployment/railway.md index 918c733..fedb4ce 100644 --- a/docs/deployment/railway.md +++ b/docs/deployment/railway.md @@ -52,6 +52,7 @@ At minimum, configure: On Railway, define `DATABASE_URL` and `REDIS_URL` on the `auth-api` service itself by referencing the backing services, rather than assuming those values are shared automatically across services. `TRUST_PROXY=1` is recommended for Railway because the service sits behind a proxy. +`METRICS_ENABLED=false` is the safer production default unless the metrics route stays private or is protected with `METRICS_AUTH_TOKEN`. ## GitHub Environments and secrets @@ -115,6 +116,8 @@ The workflow now has four explicit phases: The workflow clears the default GitHub Actions `CI=true` value for the deploy step so Railway waits for the deployment result instead of switching to build-only CI mode. +The Railway CLI version is pinned in the workflow on purpose. Update that version deliberately, in reviewable code, rather than pulling `latest` during a production promotion. + Concurrency is grouped by environment, not by a single hardcoded production bucket, so staging and production deploy queues remain isolated. ## Railway config as code @@ -157,6 +160,6 @@ Recommended manual configuration in GitHub: Branch protection should continue to require the `quality` and `integration` jobs from `.github/workflows/ci.yml` for `main`. -## Current limitation +## Current production demo -The repository automation is ready for deployment, but an actual public demo URL still depends on the Railway project existing and the required GitHub Environment secrets being configured correctly. +The public demo is live at `https://auth-api-production-a97b.up.railway.app`. diff --git a/docs/observability.md b/docs/observability.md index 6e689ef..a008101 100644 --- a/docs/observability.md +++ b/docs/observability.md @@ -12,6 +12,11 @@ This repository exposes a minimal Prometheus-compatible metrics surface focused The metrics endpoint is disabled by default and must be enabled explicitly with `METRICS_ENABLED=true`. +For non-local environments, prefer one of these patterns: + +- keep `/metrics` on a private network only +- set `METRICS_AUTH_TOKEN` and require a bearer token from the scraper + ## Run the service with metrics enabled Start the application locally with metrics exposed on `/metrics`: @@ -20,6 +25,12 @@ Start the application locally with metrics exposed on `/metrics`: METRICS_ENABLED=true npm run dev ``` +If you want to exercise the authenticated path locally, provide a token: + +```bash +METRICS_ENABLED=true METRICS_AUTH_TOKEN=local-observability-token npm run dev +``` + If you are using the full local stack, ensure PostgreSQL and Redis are running first: ```bash diff --git a/src/config/env.ts b/src/config/env.ts index 961e90a..cdb5e10 100644 --- a/src/config/env.ts +++ b/src/config/env.ts @@ -1,5 +1,6 @@ import { config as loadDotenv } from "dotenv"; import { z } from "zod"; +import { durationToMs } from "../utils/duration"; loadDotenv({ path: ".env", override: false, quiet: true }); @@ -33,6 +34,28 @@ const booleanFromEnv = z.preprocess((value) => { return value; }, z.boolean()); +const durationString = z + .string() + .trim() + .min(1) + .refine((value) => { + try { + durationToMs(value); + return true; + } catch { + return false; + } + }, "must use a supported duration format such as 15m, 7d, 60s, or 1000"); + +const optionalMetricsAuthToken = z.preprocess((value) => { + if (typeof value !== "string") { + return value; + } + + const trimmed = value.trim(); + return trimmed.length === 0 ? undefined : trimmed; +}, z.string().min(16, "METRICS_AUTH_TOKEN must have at least 16 characters").optional()); + const envSchema = z.object({ NODE_ENV: z.enum(["development", "test", "production"]).default("development"), PORT: z.coerce.number().int().positive().default(3000), @@ -45,8 +68,8 @@ const envSchema = z.object({ .string() .trim() .min(32, "REFRESH_TOKEN_SECRET must have at least 32 characters"), - ACCESS_TOKEN_EXPIRES_IN: z.string().trim().min(1).default("15m"), - REFRESH_TOKEN_EXPIRES_IN: z.string().trim().min(1).default("7d"), + ACCESS_TOKEN_EXPIRES_IN: durationString.default("15m"), + REFRESH_TOKEN_EXPIRES_IN: durationString.default("7d"), JWT_ISSUER: z.string().trim().min(1).default("auth-api"), JWT_AUDIENCE: z.string().trim().min(1).default("auth-api-clients"), REDIS_URL: optionalString, @@ -58,6 +81,7 @@ const envSchema = z.object({ TRUST_PROXY: z.coerce.number().int().min(0).default(0), DOCS_ENABLED: booleanFromEnv.default(true), METRICS_ENABLED: booleanFromEnv.default(false), + METRICS_AUTH_TOKEN: optionalMetricsAuthToken, BCRYPT_ROUNDS: z.coerce.number().int().min(8).max(15).default(10), }); diff --git a/src/controllers/metricsController.ts b/src/controllers/metricsController.ts index 581cc98..d58aaeb 100644 --- a/src/controllers/metricsController.ts +++ b/src/controllers/metricsController.ts @@ -4,12 +4,27 @@ import { metricsEnabled, renderMetrics, } from "../metrics/authMetrics"; +import { env } from "../config/env"; +import { extractBearerToken } from "../utils/bearerToken"; -export async function metrics(_req: Request, res: Response) { +export async function metrics(req: Request, res: Response) { if (!metricsEnabled) { return res.status(404).json({ message: "metrics disabled" }); } + if ( + env.METRICS_AUTH_TOKEN && + extractBearerToken(req.header("authorization")) !== env.METRICS_AUTH_TOKEN + ) { + return res.status(401).json({ + error: { + code: "METRICS_AUTHORIZATION_REQUIRED", + message: "metrics authorization is required", + correlationId: req.correlationId, + }, + }); + } + res.setHeader("Content-Type", metricsContentType); res.setHeader("Cache-Control", "no-store"); diff --git a/src/middlewares/authMiddleware.ts b/src/middlewares/authMiddleware.ts index faeea0e..aaa7005 100644 --- a/src/middlewares/authMiddleware.ts +++ b/src/middlewares/authMiddleware.ts @@ -3,6 +3,7 @@ import { SessionStatus } from "@prisma/client"; import AppError from "../errors/AppError"; import tokenService from "../services/tokenService"; import sessionRepository from "../repositories/sessionRepository"; +import { extractBearerToken } from "../utils/bearerToken"; export default async function authMiddleware( req: Request, @@ -21,8 +22,7 @@ export default async function authMiddleware( ); } - const match = authorization.match(/^Bearer\s+(.+)$/i); - const token = match?.[1]; + const token = extractBearerToken(authorization); if (!token) { return next( new AppError({ diff --git a/src/middlewares/rateLimiter.ts b/src/middlewares/rateLimiter.ts index 0a498a9..a0a39ac 100644 --- a/src/middlewares/rateLimiter.ts +++ b/src/middlewares/rateLimiter.ts @@ -9,19 +9,60 @@ type RateLimiterOptions = { maxRequests?: number; windowMs?: number; resolveKey: (req: Request) => string; + memoryMaxKeys?: number; }; type MemoryBucket = { count: number; expiresAt: number; + updatedAt: number; }; const memoryBuckets = new Map(); +const DEFAULT_MEMORY_MAX_KEYS = 10_000; + +const pruneExpiredMemoryBuckets = (now: number): void => { + for (const [key, bucket] of memoryBuckets.entries()) { + if (now > bucket.expiresAt) { + memoryBuckets.delete(key); + } + } +}; + +const evictOldestMemoryBucket = (): void => { + let oldestKey: string | null = null; + let oldestUpdatedAt = Number.POSITIVE_INFINITY; + + for (const [key, bucket] of memoryBuckets.entries()) { + if (bucket.updatedAt < oldestUpdatedAt) { + oldestUpdatedAt = bucket.updatedAt; + oldestKey = key; + } + } + + if (oldestKey) { + memoryBuckets.delete(oldestKey); + } +}; + +const consumeMemory = ( + key: string, + windowMs: number, + now: number, + memoryMaxKeys: number, +): number => { + if (!memoryBuckets.has(key) && memoryBuckets.size >= memoryMaxKeys) { + pruneExpiredMemoryBuckets(now); + + if (!memoryBuckets.has(key) && memoryBuckets.size >= memoryMaxKeys) { + evictOldestMemoryBucket(); + } + } -const consumeMemory = (key: string, windowMs: number, now: number): number => { const current = memoryBuckets.get(key) ?? { count: 0, expiresAt: now + windowMs, + updatedAt: now, }; if (now > current.expiresAt) { @@ -30,6 +71,7 @@ const consumeMemory = (key: string, windowMs: number, now: number): number => { } current.count += 1; + current.updatedAt = now; memoryBuckets.set(key, current); return current.count; @@ -58,6 +100,7 @@ export function createRateLimiter({ maxRequests = env.RATE_LIMIT_MAX_REQUESTS, windowMs = env.RATE_LIMIT_WINDOW_MS, resolveKey, + memoryMaxKeys = DEFAULT_MEMORY_MAX_KEYS, }: RateLimiterOptions) { return async function rateLimiter( req: Request, @@ -71,7 +114,7 @@ export function createRateLimiter({ try { const count = redisEnabled ? await consumeRedis(key, windowMs) - : consumeMemory(key, windowMs, now); + : consumeMemory(key, windowMs, now, memoryMaxKeys); if (count > maxRequests) { authMetrics.recordRateLimitHit(bucket, redisEnabled ? "redis" : "memory"); @@ -90,7 +133,7 @@ export function createRateLimiter({ req.log.warn({ key, error }, "rate_limiter_fallback"); - const count = consumeMemory(key, windowMs, now); + const count = consumeMemory(key, windowMs, now, memoryMaxKeys); if (count > maxRequests) { authMetrics.recordRateLimitHit(bucket, "memory"); return next( @@ -111,3 +154,13 @@ export const authMutationRateLimiter = createRateLimiter({ bucket: "auth", resolveKey: (req) => req.ip || "global", }); + +// Test hooks for the in-memory fail-soft store. +export const __rateLimiterInternals = { + clearMemoryBuckets(): void { + memoryBuckets.clear(); + }, + getMemoryBucketCount(): number { + return memoryBuckets.size; + }, +}; diff --git a/src/utils/bearerToken.ts b/src/utils/bearerToken.ts new file mode 100644 index 0000000..22e6835 --- /dev/null +++ b/src/utils/bearerToken.ts @@ -0,0 +1,23 @@ +export function extractBearerToken(authorization: string | undefined): string | null { + if (!authorization) { + return null; + } + + const trimmed = authorization.trim(); + if (trimmed.length === 0) { + return null; + } + + const separatorIndex = trimmed.indexOf(" "); + if (separatorIndex === -1) { + return null; + } + + const scheme = trimmed.slice(0, separatorIndex); + if (scheme.toLowerCase() !== "bearer") { + return null; + } + + const token = trimmed.slice(separatorIndex + 1).trim(); + return token.length > 0 ? token : null; +} diff --git a/tests/config/env.test.ts b/tests/config/env.test.ts index c8898a8..f52abe3 100644 --- a/tests/config/env.test.ts +++ b/tests/config/env.test.ts @@ -27,6 +27,7 @@ describe("env config", () => { expect(env.LOG_LEVEL).toBe("silent"); expect(env.DOCS_ENABLED).toBe(true); expect(env.METRICS_ENABLED).toBe(false); + expect(env.METRICS_AUTH_TOKEN).toBeUndefined(); }); it("fails fast when secrets are invalid", async () => { @@ -39,4 +40,17 @@ describe("env config", () => { /ACCESS_TOKEN_SECRET|REFRESH_TOKEN_SECRET/, ); }); + + it("fails fast when token durations use an unsupported format", async () => { + process.env.NODE_ENV = "test"; + process.env.DATABASE_URL = "postgresql://auth_user:auth_password@localhost:5432/auth_api"; + process.env.ACCESS_TOKEN_SECRET = "test-access-secret-with-at-least-thirty-two-characters"; + process.env.REFRESH_TOKEN_SECRET = + "test-refresh-secret-with-at-least-thirty-two-characters"; + process.env.ACCESS_TOKEN_EXPIRES_IN = "15 minutes"; + + await expect(import("../../src/config/env")).rejects.toThrow( + /ACCESS_TOKEN_EXPIRES_IN/, + ); + }); }); diff --git a/tests/controllers/metrics.controller.test.ts b/tests/controllers/metrics.controller.test.ts index d65495d..f975abb 100644 --- a/tests/controllers/metrics.controller.test.ts +++ b/tests/controllers/metrics.controller.test.ts @@ -34,6 +34,40 @@ describe("metricsController", () => { expect(response.json).toHaveBeenCalledWith({ message: "metrics disabled" }); }); + it("returns 401 when metrics auth is configured and the bearer token is missing", async () => { + vi.doMock("../../src/metrics/authMetrics", () => ({ + metricsEnabled: true, + metricsContentType: "text/plain; version=0.0.4; charset=utf-8", + renderMetrics: vi.fn(), + })); + + vi.doMock("../../src/config/env", () => ({ + env: { + METRICS_AUTH_TOKEN: "metrics-auth-token", + }, + })); + + const { metrics } = await import("../../src/controllers/metricsController"); + const response = createResponse(); + + await metrics( + { + correlationId: "req-123", + header: vi.fn(() => undefined), + } as never, + response as never, + ); + + expect(response.status).toHaveBeenCalledWith(401); + expect(response.json).toHaveBeenCalledWith({ + error: { + code: "METRICS_AUTHORIZATION_REQUIRED", + message: "metrics authorization is required", + correlationId: "req-123", + }, + }); + }); + it("returns Prometheus metrics when enabled", async () => { const renderMetrics = vi .fn() @@ -45,10 +79,25 @@ describe("metricsController", () => { renderMetrics, })); + vi.doMock("../../src/config/env", () => ({ + env: { + METRICS_AUTH_TOKEN: "metrics-auth-token", + }, + })); + const { metrics } = await import("../../src/controllers/metricsController"); const response = createResponse(); - await metrics({} as never, response as never); + await metrics( + { + header: vi.fn((name: string) => + name.toLowerCase() === "authorization" + ? "Bearer metrics-auth-token" + : undefined, + ), + } as never, + response as never, + ); expect(response.setHeader).toHaveBeenCalledWith( "Content-Type", diff --git a/tests/middlewares/rateLimiter.middleware.test.ts b/tests/middlewares/rateLimiter.middleware.test.ts index 43c1435..ccc3dc3 100644 --- a/tests/middlewares/rateLimiter.middleware.test.ts +++ b/tests/middlewares/rateLimiter.middleware.test.ts @@ -42,7 +42,10 @@ describe("rateLimiter", () => { }); it("blocks after exceeding the memory fallback limit", async () => { - const { createRateLimiter } = await loadRateLimiter({ redisEnabled: false }); + const { createRateLimiter, __rateLimiterInternals } = await loadRateLimiter({ + redisEnabled: false, + }); + __rateLimiterInternals.clearMemoryBuckets(); const limiter = createRateLimiter({ bucket: "test", maxRequests: 1, @@ -63,10 +66,11 @@ describe("rateLimiter", () => { }); it("falls back to memory when redis errors", async () => { - const { createRateLimiter } = await loadRateLimiter({ + const { createRateLimiter, __rateLimiterInternals } = await loadRateLimiter({ redisEnabled: true, incr: () => Promise.reject(new Error("redis down")), }); + __rateLimiterInternals.clearMemoryBuckets(); const limiter = createRateLimiter({ bucket: "test", maxRequests: 2, @@ -88,7 +92,7 @@ describe("rateLimiter", () => { }); it("records a rate-limit hit when redis-backed limiting blocks", async () => { - const { createRateLimiter } = await loadRateLimiter({ + const { createRateLimiter, __rateLimiterInternals } = await loadRateLimiter({ redisEnabled: true, incr: (() => { let count = 1; @@ -96,6 +100,7 @@ describe("rateLimiter", () => { return () => Promise.resolve(count++); })(), }); + __rateLimiterInternals.clearMemoryBuckets(); const limiter = createRateLimiter({ bucket: "auth", maxRequests: 1, @@ -115,4 +120,24 @@ describe("rateLimiter", () => { expect(blocked.mock.calls[0]?.[0]?.code).toBe("TOO_MANY_REQUESTS"); expect(recordRateLimitHit).toHaveBeenCalledWith("auth", "redis"); }); + + it("caps the in-memory fallback store under high-cardinality traffic", async () => { + const { createRateLimiter, __rateLimiterInternals } = await loadRateLimiter({ + redisEnabled: false, + }); + __rateLimiterInternals.clearMemoryBuckets(); + const limiter = createRateLimiter({ + bucket: "test", + maxRequests: 10, + windowMs: 60_000, + memoryMaxKeys: 2, + resolveKey: (req) => req.ip || "global", + }); + + await limiter({ ip: "10.10.0.10", log: { warn: vi.fn() } } as never, {} as never, vi.fn()); + await limiter({ ip: "10.10.0.11", log: { warn: vi.fn() } } as never, {} as never, vi.fn()); + await limiter({ ip: "10.10.0.12", log: { warn: vi.fn() } } as never, {} as never, vi.fn()); + + expect(__rateLimiterInternals.getMemoryBucketCount()).toBe(2); + }); });