diff --git a/.env.example b/.env.example
index a80cbae..11e47a3 100644
--- a/.env.example
+++ b/.env.example
@@ -13,4 +13,5 @@ LOG_LEVEL="info"
 TRUST_PROXY=0
 DOCS_ENABLED=true
 METRICS_ENABLED=false
+METRICS_AUTH_TOKEN=""
 BCRYPT_ROUNDS=10
diff --git a/.env.production.example b/.env.production.example
index 096f9e7..0bc206e 100644
--- a/.env.production.example
+++ b/.env.production.example
@@ -17,5 +17,6 @@ RATE_LIMIT_MAX_REQUESTS=100
 LOG_LEVEL="info"
 TRUST_PROXY=1
 DOCS_ENABLED=true
-METRICS_ENABLED=true
+METRICS_ENABLED=false
+METRICS_AUTH_TOKEN=""
 BCRYPT_ROUNDS=10
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 983b203..2ae6664 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -29,6 +29,9 @@ concurrency:
 permissions:
   contents: read
 
+env:
+  RAILWAY_CLI_VERSION: 4.31.0
+
 jobs:
   resolve-context:
     name: resolve deployment context
@@ -237,7 +240,7 @@ jobs:
           node-version: 20
 
       - name: Install Railway CLI
-        run: npm install --global @railway/cli@latest
+        run: npm install --global @railway/cli@${RAILWAY_CLI_VERSION}
 
       - name: Deploy to Railway
         env:
diff --git a/README.md b/README.md
index 3b4d775..26d9679 100644
--- a/README.md
+++ b/README.md
@@ -174,6 +174,7 @@ LOG_LEVEL="info"
 TRUST_PROXY=0
 DOCS_ENABLED=true
 METRICS_ENABLED=false
+METRICS_AUTH_TOKEN=""
 BCRYPT_ROUNDS=10
 ```
 
@@ -206,6 +207,7 @@ Deployment automation is implemented through [`.github/workflows/deploy.yml`](./
 - manual dispatch for intentional non-production deployments
 - exact-ref verification before any deployment
 - smoke validation for `/health`, `/ready`, and `/docs.json`
+- a pinned Railway CLI version for deterministic release promotion
 
 Deployment setup material:
 
@@ -229,6 +231,8 @@ Enable metrics locally with `METRICS_ENABLED=true` and expose:
 
 - `GET /metrics`
 
+If metrics are enabled on any non-local environment, prefer setting `METRICS_AUTH_TOKEN` or keeping the route private at the network layer instead of exposing raw Prometheus output publicly.
+
 Local observability assets:
 
 - [`docs/observability.md`](./docs/observability.md)
diff --git a/docs/deployment/railway.md b/docs/deployment/railway.md
index 918c733..fedb4ce 100644
--- a/docs/deployment/railway.md
+++ b/docs/deployment/railway.md
@@ -52,6 +52,7 @@ At minimum, configure:
 On Railway, define `DATABASE_URL` and `REDIS_URL` on the `auth-api` service itself by referencing the backing services, rather than assuming those values are shared automatically across services.
 
 `TRUST_PROXY=1` is recommended for Railway because the service sits behind a proxy.
+`METRICS_ENABLED=false` is the safer production default unless the metrics route stays private or is protected with `METRICS_AUTH_TOKEN`.
 
 ## GitHub Environments and secrets
 
@@ -115,6 +116,8 @@ The workflow now has four explicit phases:
 
 The workflow clears the default GitHub Actions `CI=true` value for the deploy step so Railway waits for the deployment result instead of switching to build-only CI mode.
 
+The Railway CLI version is pinned in the workflow on purpose. Update that version deliberately, in reviewable code, rather than pulling `latest` during a production promotion.
+
 Concurrency is grouped by environment, not by a single hardcoded production bucket, so staging and production deploy queues remain isolated.
 
 ## Railway config as code
@@ -157,6 +160,6 @@ Recommended manual configuration in GitHub:
 
 Branch protection should continue to require the `quality` and `integration` jobs from `.github/workflows/ci.yml` for `main`.
 
-## Current limitation
+## Current production demo
 
-The repository automation is ready for deployment, but an actual public demo URL still depends on the Railway project existing and the required GitHub Environment secrets being configured correctly.
+The public demo is live at `https://auth-api-production-a97b.up.railway.app`.
diff --git a/docs/observability.md b/docs/observability.md
index 6e689ef..a008101 100644
--- a/docs/observability.md
+++ b/docs/observability.md
@@ -12,6 +12,11 @@ This repository exposes a minimal Prometheus-compatible metrics surface focused
 
 The metrics endpoint is disabled by default and must be enabled explicitly with `METRICS_ENABLED=true`.
 
+For non-local environments, prefer one of these patterns:
+
+- keep `/metrics` on a private network only
+- set `METRICS_AUTH_TOKEN` and require a bearer token from the scraper
+
 ## Run the service with metrics enabled
 
 Start the application locally with metrics exposed on `/metrics`:
@@ -20,6 +25,12 @@ Start the application locally with metrics exposed on `/metrics`:
 METRICS_ENABLED=true npm run dev
 ```
 
+If you want to exercise the authenticated path locally, provide a token:
+
+```bash
+METRICS_ENABLED=true METRICS_AUTH_TOKEN=local-observability-token npm run dev
+```
+
 If you are using the full local stack, ensure PostgreSQL and Redis are running first:
 
 ```bash
diff --git a/src/config/env.ts b/src/config/env.ts
index 961e90a..cdb5e10 100644
--- a/src/config/env.ts
+++ b/src/config/env.ts
@@ -1,5 +1,6 @@
 import { config as loadDotenv } from "dotenv";
 import { z } from "zod";
+import { durationToMs } from "../utils/duration";
 
 loadDotenv({ path: ".env", override: false, quiet: true });
 
@@ -33,6 +34,28 @@ const booleanFromEnv = z.preprocess((value) => {
   return value;
 }, z.boolean());
 
+const durationString = z
+  .string()
+  .trim()
+  .min(1)
+  .refine((value) => {
+    try {
+      durationToMs(value);
+      return true;
+    } catch {
+      return false;
+    }
+  }, "must use a supported duration format such as 15m, 7d, 60s, or 1000");
+
+const optionalMetricsAuthToken = z.preprocess((value) => {
+  if (typeof value !== "string") {
+    return value;
+  }
+
+  const trimmed = value.trim();
+  return trimmed.length === 0 ? undefined : trimmed;
+}, z.string().min(16, "METRICS_AUTH_TOKEN must have at least 16 characters").optional());
+
 const envSchema = z.object({
   NODE_ENV: z.enum(["development", "test", "production"]).default("development"),
   PORT: z.coerce.number().int().positive().default(3000),
@@ -45,8 +68,8 @@ const envSchema = z.object({
     .string()
     .trim()
     .min(32, "REFRESH_TOKEN_SECRET must have at least 32 characters"),
-  ACCESS_TOKEN_EXPIRES_IN: z.string().trim().min(1).default("15m"),
-  REFRESH_TOKEN_EXPIRES_IN: z.string().trim().min(1).default("7d"),
+  ACCESS_TOKEN_EXPIRES_IN: durationString.default("15m"),
+  REFRESH_TOKEN_EXPIRES_IN: durationString.default("7d"),
   JWT_ISSUER: z.string().trim().min(1).default("auth-api"),
   JWT_AUDIENCE: z.string().trim().min(1).default("auth-api-clients"),
   REDIS_URL: optionalString,
@@ -58,6 +81,7 @@ const envSchema = z.object({
   TRUST_PROXY: z.coerce.number().int().min(0).default(0),
   DOCS_ENABLED: booleanFromEnv.default(true),
   METRICS_ENABLED: booleanFromEnv.default(false),
+  METRICS_AUTH_TOKEN: optionalMetricsAuthToken,
   BCRYPT_ROUNDS: z.coerce.number().int().min(8).max(15).default(10),
 });
 
diff --git a/src/controllers/metricsController.ts b/src/controllers/metricsController.ts
index 581cc98..d58aaeb 100644
--- a/src/controllers/metricsController.ts
+++ b/src/controllers/metricsController.ts
@@ -4,12 +4,27 @@ import {
   metricsEnabled,
   renderMetrics,
 } from "../metrics/authMetrics";
+import { env } from "../config/env";
+import { extractBearerToken } from "../utils/bearerToken";
 
-export async function metrics(_req: Request, res: Response) {
+export async function metrics(req: Request, res: Response) {
   if (!metricsEnabled) {
     return res.status(404).json({ message: "metrics disabled" });
   }
 
+  if (
+    env.METRICS_AUTH_TOKEN &&
+    extractBearerToken(req.header("authorization")) !== env.METRICS_AUTH_TOKEN
+  ) {
+    return res.status(401).json({
+      error: {
+        code: "METRICS_AUTHORIZATION_REQUIRED",
+        message: "metrics authorization is required",
+        correlationId: req.correlationId,
+      },
+    });
+  }
+
   res.setHeader("Content-Type", metricsContentType);
   res.setHeader("Cache-Control", "no-store");
 
diff --git a/src/middlewares/authMiddleware.ts b/src/middlewares/authMiddleware.ts
index faeea0e..aaa7005 100644
--- a/src/middlewares/authMiddleware.ts
+++ b/src/middlewares/authMiddleware.ts
@@ -3,6 +3,7 @@ import { SessionStatus } from "@prisma/client";
 import AppError from "../errors/AppError";
 import tokenService from "../services/tokenService";
 import sessionRepository from "../repositories/sessionRepository";
+import { extractBearerToken } from "../utils/bearerToken";
 
 export default async function authMiddleware(
   req: Request,
@@ -21,8 +22,7 @@ export default async function authMiddleware(
     );
   }
 
-  const match = authorization.match(/^Bearer\s+(.+)$/i);
-  const token = match?.[1];
+  const token = extractBearerToken(authorization);
   if (!token) {
     return next(
       new AppError({
diff --git a/src/middlewares/rateLimiter.ts b/src/middlewares/rateLimiter.ts
index 0a498a9..a0a39ac 100644
--- a/src/middlewares/rateLimiter.ts
+++ b/src/middlewares/rateLimiter.ts
@@ -9,19 +9,60 @@ type RateLimiterOptions = {
   maxRequests?: number;
   windowMs?: number;
   resolveKey: (req: Request) => string;
+  memoryMaxKeys?: number;
 };
 
 type MemoryBucket = {
   count: number;
   expiresAt: number;
+  updatedAt: number;
 };
 
 const memoryBuckets = new Map<string, MemoryBucket>();
+const DEFAULT_MEMORY_MAX_KEYS = 10_000;
+
+const pruneExpiredMemoryBuckets = (now: number): void => {
+  for (const [key, bucket] of memoryBuckets.entries()) {
+    if (now > bucket.expiresAt) {
+      memoryBuckets.delete(key);
+    }
+  }
+};
+
+const evictOldestMemoryBucket = (): void => {
+  let oldestKey: string | null = null;
+  let oldestUpdatedAt = Number.POSITIVE_INFINITY;
+
+  for (const [key, bucket] of memoryBuckets.entries()) {
+    if (bucket.updatedAt < oldestUpdatedAt) {
+      oldestUpdatedAt = bucket.updatedAt;
+      oldestKey = key;
+    }
+  }
+
+  if (oldestKey) {
+    memoryBuckets.delete(oldestKey);
+  }
+};
+
+const consumeMemory = (
+  key: string,
+  windowMs: number,
+  now: number,
+  memoryMaxKeys: number,
+): number => {
+  if (!memoryBuckets.has(key) && memoryBuckets.size >= memoryMaxKeys) {
+    pruneExpiredMemoryBuckets(now);
+
+    if (!memoryBuckets.has(key) && memoryBuckets.size >= memoryMaxKeys) {
+      evictOldestMemoryBucket();
+    }
+  }
 
-const consumeMemory = (key: string, windowMs: number, now: number): number => {
   const current = memoryBuckets.get(key) ?? {
     count: 0,
     expiresAt: now + windowMs,
+    updatedAt: now,
   };
 
   if (now > current.expiresAt) {
@@ -30,6 +71,7 @@ const consumeMemory = (key: string, windowMs: number, now: number): number => {
   }
 
   current.count += 1;
+  current.updatedAt = now;
   memoryBuckets.set(key, current);
 
   return current.count;
@@ -58,6 +100,7 @@ export function createRateLimiter({
   maxRequests = env.RATE_LIMIT_MAX_REQUESTS,
   windowMs = env.RATE_LIMIT_WINDOW_MS,
   resolveKey,
+  memoryMaxKeys = DEFAULT_MEMORY_MAX_KEYS,
 }: RateLimiterOptions) {
   return async function rateLimiter(
     req: Request,
@@ -71,7 +114,7 @@ export function createRateLimiter({
     try {
       const count = redisEnabled
         ? await consumeRedis(key, windowMs)
-        : consumeMemory(key, windowMs, now);
+        : consumeMemory(key, windowMs, now, memoryMaxKeys);
 
       if (count > maxRequests) {
         authMetrics.recordRateLimitHit(bucket, redisEnabled ? "redis" : "memory");
@@ -90,7 +133,7 @@ export function createRateLimiter({
 
       req.log.warn({ key, error }, "rate_limiter_fallback");
 
-      const count = consumeMemory(key, windowMs, now);
+      const count = consumeMemory(key, windowMs, now, memoryMaxKeys);
       if (count > maxRequests) {
         authMetrics.recordRateLimitHit(bucket, "memory");
         return next(
@@ -111,3 +154,13 @@ export const authMutationRateLimiter = createRateLimiter({
   bucket: "auth",
   resolveKey: (req) => req.ip || "global",
 });
+
+// Test hooks for the in-memory fail-soft store.
+export const __rateLimiterInternals = {
+  clearMemoryBuckets(): void {
+    memoryBuckets.clear();
+  },
+  getMemoryBucketCount(): number {
+    return memoryBuckets.size;
+  },
+};
diff --git a/src/utils/bearerToken.ts b/src/utils/bearerToken.ts
new file mode 100644
index 0000000..22e6835
--- /dev/null
+++ b/src/utils/bearerToken.ts
@@ -0,0 +1,23 @@
+export function extractBearerToken(authorization: string | undefined): string | null {
+  if (!authorization) {
+    return null;
+  }
+
+  const trimmed = authorization.trim();
+  if (trimmed.length === 0) {
+    return null;
+  }
+
+  const separatorIndex = trimmed.indexOf(" ");
+  if (separatorIndex === -1) {
+    return null;
+  }
+
+  const scheme = trimmed.slice(0, separatorIndex);
+  if (scheme.toLowerCase() !== "bearer") {
+    return null;
+  }
+
+  const token = trimmed.slice(separatorIndex + 1).trim();
+  return token.length > 0 ? token : null;
+}
diff --git a/tests/config/env.test.ts b/tests/config/env.test.ts
index c8898a8..f52abe3 100644
--- a/tests/config/env.test.ts
+++ b/tests/config/env.test.ts
@@ -27,6 +27,7 @@ describe("env config", () => {
     expect(env.LOG_LEVEL).toBe("silent");
     expect(env.DOCS_ENABLED).toBe(true);
     expect(env.METRICS_ENABLED).toBe(false);
+    expect(env.METRICS_AUTH_TOKEN).toBeUndefined();
   });
 
   it("fails fast when secrets are invalid", async () => {
@@ -39,4 +40,17 @@ describe("env config", () => {
       /ACCESS_TOKEN_SECRET|REFRESH_TOKEN_SECRET/,
     );
   });
+
+  it("fails fast when token durations use an unsupported format", async () => {
+    process.env.NODE_ENV = "test";
+    process.env.DATABASE_URL = "postgresql://auth_user:auth_password@localhost:5432/auth_api";
+    process.env.ACCESS_TOKEN_SECRET = "test-access-secret-with-at-least-thirty-two-characters";
+    process.env.REFRESH_TOKEN_SECRET =
+      "test-refresh-secret-with-at-least-thirty-two-characters";
+    process.env.ACCESS_TOKEN_EXPIRES_IN = "15 minutes";
+
+    await expect(import("../../src/config/env")).rejects.toThrow(
+      /ACCESS_TOKEN_EXPIRES_IN/,
+    );
+  });
 });
diff --git a/tests/controllers/metrics.controller.test.ts b/tests/controllers/metrics.controller.test.ts
index d65495d..f975abb 100644
--- a/tests/controllers/metrics.controller.test.ts
+++ b/tests/controllers/metrics.controller.test.ts
@@ -34,6 +34,40 @@ describe("metricsController", () => {
     expect(response.json).toHaveBeenCalledWith({ message: "metrics disabled" });
   });
 
+  it("returns 401 when metrics auth is configured and the bearer token is missing", async () => {
+    vi.doMock("../../src/metrics/authMetrics", () => ({
+      metricsEnabled: true,
+      metricsContentType: "text/plain; version=0.0.4; charset=utf-8",
+      renderMetrics: vi.fn(),
+    }));
+
+    vi.doMock("../../src/config/env", () => ({
+      env: {
+        METRICS_AUTH_TOKEN: "metrics-auth-token",
+      },
+    }));
+
+    const { metrics } = await import("../../src/controllers/metricsController");
+    const response = createResponse();
+
+    await metrics(
+      {
+        correlationId: "req-123",
+        header: vi.fn(() => undefined),
+      } as never,
+      response as never,
+    );
+
+    expect(response.status).toHaveBeenCalledWith(401);
+    expect(response.json).toHaveBeenCalledWith({
+      error: {
+        code: "METRICS_AUTHORIZATION_REQUIRED",
+        message: "metrics authorization is required",
+        correlationId: "req-123",
+      },
+    });
+  });
+
   it("returns Prometheus metrics when enabled", async () => {
     const renderMetrics = vi
       .fn()
@@ -45,10 +79,25 @@ describe("metricsController", () => {
       renderMetrics,
     }));
 
+    vi.doMock("../../src/config/env", () => ({
+      env: {
+        METRICS_AUTH_TOKEN: "metrics-auth-token",
+      },
+    }));
+
     const { metrics } = await import("../../src/controllers/metricsController");
     const response = createResponse();
 
-    await metrics({} as never, response as never);
+    await metrics(
+      {
+        header: vi.fn((name: string) =>
+          name.toLowerCase() === "authorization"
+            ? "Bearer metrics-auth-token"
+            : undefined,
+        ),
+      } as never,
+      response as never,
+    );
 
     expect(response.setHeader).toHaveBeenCalledWith(
       "Content-Type",
diff --git a/tests/middlewares/rateLimiter.middleware.test.ts b/tests/middlewares/rateLimiter.middleware.test.ts
index 43c1435..ccc3dc3 100644
--- a/tests/middlewares/rateLimiter.middleware.test.ts
+++ b/tests/middlewares/rateLimiter.middleware.test.ts
@@ -42,7 +42,10 @@ describe("rateLimiter", () => {
   });
 
   it("blocks after exceeding the memory fallback limit", async () => {
-    const { createRateLimiter } = await loadRateLimiter({ redisEnabled: false });
+    const { createRateLimiter, __rateLimiterInternals } = await loadRateLimiter({
+      redisEnabled: false,
+    });
+    __rateLimiterInternals.clearMemoryBuckets();
     const limiter = createRateLimiter({
       bucket: "test",
       maxRequests: 1,
@@ -63,10 +66,11 @@ describe("rateLimiter", () => {
   });
 
   it("falls back to memory when redis errors", async () => {
-    const { createRateLimiter } = await loadRateLimiter({
+    const { createRateLimiter, __rateLimiterInternals } = await loadRateLimiter({
       redisEnabled: true,
       incr: () => Promise.reject(new Error("redis down")),
     });
+    __rateLimiterInternals.clearMemoryBuckets();
     const limiter = createRateLimiter({
       bucket: "test",
       maxRequests: 2,
@@ -88,7 +92,7 @@ describe("rateLimiter", () => {
   });
 
   it("records a rate-limit hit when redis-backed limiting blocks", async () => {
-    const { createRateLimiter } = await loadRateLimiter({
+    const { createRateLimiter, __rateLimiterInternals } = await loadRateLimiter({
       redisEnabled: true,
       incr: (() => {
         let count = 1;
@@ -96,6 +100,7 @@ describe("rateLimiter", () => {
         return () => Promise.resolve(count++);
       })(),
     });
+    __rateLimiterInternals.clearMemoryBuckets();
     const limiter = createRateLimiter({
       bucket: "auth",
       maxRequests: 1,
@@ -115,4 +120,24 @@ describe("rateLimiter", () => {
     expect(blocked.mock.calls[0]?.[0]?.code).toBe("TOO_MANY_REQUESTS");
     expect(recordRateLimitHit).toHaveBeenCalledWith("auth", "redis");
   });
+
+  it("caps the in-memory fallback store under high-cardinality traffic", async () => {
+    const { createRateLimiter, __rateLimiterInternals } = await loadRateLimiter({
+      redisEnabled: false,
+    });
+    __rateLimiterInternals.clearMemoryBuckets();
+    const limiter = createRateLimiter({
+      bucket: "test",
+      maxRequests: 10,
+      windowMs: 60_000,
+      memoryMaxKeys: 2,
+      resolveKey: (req) => req.ip || "global",
+    });
+
+    await limiter({ ip: "10.10.0.10", log: { warn: vi.fn() } } as never, {} as never, vi.fn());
+    await limiter({ ip: "10.10.0.11", log: { warn: vi.fn() } } as never, {} as never, vi.fn());
+    await limiter({ ip: "10.10.0.12", log: { warn: vi.fn() } } as never, {} as never, vi.fn());
+
+    expect(__rateLimiterInternals.getMemoryBucketCount()).toBe(2);
+  });
 });