diff --git a/packages/opencode/src/cli/cmd/run.ts b/packages/opencode/src/cli/cmd/run.ts
index dc59c4c86..e9cef524c 100644
--- a/packages/opencode/src/cli/cmd/run.ts
+++ b/packages/opencode/src/cli/cmd/run.ts
@@ -10,6 +10,15 @@ import { Filesystem } from "../../util/filesystem"
 import { createOpencodeClient, type Message, type OpencodeClient, type ToolPart } from "@opencode-ai/sdk/v2"
 import { Server } from "../../server/server"
 import { Provider } from "../../provider/provider"
+// altimate_change start — verifier-gated router (run cheap, verify, escalate)
+import { Router } from "../../router/router"
+import { Verifier } from "../../router/verifier"
+import { Verdict } from "../../router/verdict"
+import { Policy } from "../../router/policy"
+import { EquivalenceVerifier } from "../../router/equivalence-verifier"
+import { ReferenceResolver } from "../../router/reference"
+import * as Dispatcher from "../../altimate/native/dispatcher"
+// altimate_change end
 import { Agent } from "../../agent/agent"
 import { PermissionNext } from "../../permission/next"
 import { Tool } from "../../tool/tool"
@@ -816,46 +825,53 @@ You are speaking to a non-technical business executive. Follow these rules stric
         process.exit(1)
       })
 
-      if (args.command) {
-        await sdk.session.command({
-          sessionID,
-          agent,
-          model: args.model,
-          command: args.command,
-          arguments: message,
-          variant: args.variant,
-        })
-      } else {
-        const model = args.model ? Provider.parseModel(args.model) : undefined
-        await sdk.session.prompt({
-          sessionID,
-          agent,
-          model,
-          variant: args.variant,
-          parts: [...files, { type: "text", text: message }],
-          ...(audienceSystem ? { system: audienceSystem } : {}),
-        })
-      }
+      // altimate_change start — per-run finally cleanup. The verifier-gated router catches a
+      // thrown tier (router.ts) and escalates to the next tier within the SAME process; without
+      // this finally, a tier whose prompt throws would leak its SIGINT/SIGTERM/beforeExit handlers
+      // and leave its tracer active, accumulating across tiers. Cleanup now always runs.
+      try {
+        if (args.command) {
+          await sdk.session.command({
+            sessionID,
+            agent,
+            model: args.model,
+            command: args.command,
+            arguments: message,
+            variant: args.variant,
+          })
+        } else {
+          const model = args.model ? Provider.parseModel(args.model) : undefined
+          await sdk.session.prompt({
+            sessionID,
+            agent,
+            model,
+            variant: args.variant,
+            parts: [...files, { type: "text", text: message }],
+            ...(audienceSystem ? { system: audienceSystem } : {}),
+          })
+        }
 
-      // Wait for the event loop to drain (breaks when session reaches idle)
-      await loopPromise
-
-      // Remove crash handlers — trace will be finalized cleanly
-      process.removeListener("SIGINT", onSigint)
-      process.removeListener("SIGTERM", onSigterm)
-      process.removeListener("beforeExit", onBeforeExit)
-
-      // Finalize trace and save to disk
-      if (tracer) {
-        Tracer.setActive(null)
-        const tracePath = await tracer.endTrace(error)
-        if (tracePath) {
-          emit("trace_saved", { path: tracePath })
-          if (args.format !== "json" && process.stdout.isTTY) {
-            UI.println(UI.Style.TEXT_DIM + `Trace saved: ${tracePath}` + UI.Style.TEXT_NORMAL)
+        // Wait for the event loop to drain (breaks when session reaches idle)
+        await loopPromise
+      } finally {
+        // Remove crash handlers — trace will be finalized cleanly
+        process.removeListener("SIGINT", onSigint)
+        process.removeListener("SIGTERM", onSigterm)
+        process.removeListener("beforeExit", onBeforeExit)
+
+        // Finalize trace and save to disk (with `error` if the run failed)
+        if (tracer) {
+          Tracer.setActive(null)
+          const tracePath = await tracer.endTrace(error)
+          if (tracePath) {
+            emit("trace_saved", { path: tracePath })
+            if (args.format !== "json" && process.stdout.isTTY) {
+              UI.println(UI.Style.TEXT_DIM + `Trace saved: ${tracePath}` + UI.Style.TEXT_NORMAL)
+            }
           }
         }
       }
+      // altimate_change end
 
       // Write accumulated text output to file if --output was specified
       if (args.output) {
@@ -864,8 +880,190 @@ You are speaking to a non-technical business executive. Follow these rules stric
         await Bun.write(outputPath, content)
         process.stderr.write(`\n✓ Output saved to: ${outputPath}\n`)
       }
+
+      // altimate_change start — expose the session id so the router can reuse one session
+      // across tiers (escalation continues the same session instead of starting fresh).
+      return sessionID
+      // altimate_change end
+    }
+
+    // altimate_change start — verifier-gated router orchestration
+    // Deterministic-verify the dbt workspace in cwd (`dbt build`, judged by Verifier).
+    // Only gates real dbt projects; with nothing to prove it returns ok (no escalation).
+    async function verifyWorkspace(): Promise<Verifier.Verdict> {
+      const root = process.cwd()
+      if (!(await Filesystem.exists(path.join(root, "dbt_project.yml"))))
+        return {
+          ok: true,
+          unverifiable: true,
+          strength: Verifier.Strength.UNVERIFIABLE,
+          decision: Verifier.Decision.OK,
+          reason: "no dbt project to verify",
+          checks: [],
+        }
+
+      // Reference-free gate: `dbt build` in `dir`, judged by Verifier. Used directly (default)
+      // and as the fallback for the equivalence verifier (greenfield / undecidable).
+      const buildVerify = async (dir: string): Promise<Verifier.Verdict> => {
+        try {
+          const proc = Bun.spawn(["dbt", "build"], { cwd: dir, stdout: "pipe", stderr: "pipe" })
+          // Hard timeout so a hung dbt (lock, prompt, runaway query) can't stall the run.
+          let timedOut = false
+          const timer = setTimeout(() => {
+            timedOut = true
+            proc.kill()
+          }, 300_000)
+          const out = (await new Response(proc.stdout).text()) + (await new Response(proc.stderr).text())
+          const code = await proc.exited
+          clearTimeout(timer)
+          if (timedOut)
+            return {
+              ok: false,
+              strength: Verifier.Strength.BUILD,
+              decision: Verifier.Decision.FAILED,
+              reason: "dbt build timed out after 300s",
+              checks: [{ name: "dbt build", ok: false, detail: "timed out after 300s" }],
+            }
+          return Verifier.fromDbt(out, code)
+        } catch (e) {
+          // dbt binary missing / spawn failure → can't verify; mark unverifiable (fail-open, but honest).
+          return {
+            ok: true,
+            unverifiable: true,
+            strength: Verifier.Strength.UNVERIFIABLE,
+            decision: Verifier.Decision.OK,
+            reason: `verify skipped: ${String(e)}`,
+            checks: [],
+          }
+        }
+      }
+
+      // EXPERIMENTAL (flag-gated, default off): equivalence-backed verification in the
+      // reference-available regime — proven-equivalent vs the model's base version. Always
+      // falls back to `buildVerify` on greenfield / undecidable / any error, so it can never
+      // be less safe than the build gate. Value is gated on altimate-core dialect + schema
+      // coverage (altimate-core-internal #128 / #130); ships dormant until those land.
+      if (process.env["ALTIMATE_ROUTER_EQUIVALENCE"] === "1") {
+        try {
+          const exec: ReferenceResolver.Exec = async (cmd, args, cwd) => {
+            const p = Bun.spawn([cmd, ...args], { cwd, stdout: "pipe", stderr: "pipe" })
+            const stdout = await new Response(p.stdout).text()
+            return { stdout, code: await p.exited }
+          }
+          const readCompiled = async (dir: string): Promise<Map<string, string>> => {
+            const { readdir } = await import("node:fs/promises")
+            const map = new Map<string, string>()
+            const baseDir = path.join(dir, "target", "compiled")
+            if (!(await Filesystem.exists(baseDir))) return map
+            const walk = async (d: string) => {
+              for (const e of await readdir(d, { withFileTypes: true })) {
+                const fp = path.join(d, e.name)
+                if (e.isDirectory()) await walk(fp)
+                else if (e.name.endsWith(".sql")) map.set(e.name.replace(/\.sql$/, ""), await Bun.file(fp).text())
+              }
+            }
+            await walk(baseDir)
+            return map
+          }
+          const checkoutBase = async (workdir: string, ref: string) => {
+            const dir = path.join("/tmp", `altimate-base-${Date.now()}`)
+            await exec("git", ["worktree", "add", "--detach", dir, ref], workdir)
+            return {
+              dir,
+              cleanup: async () => {
+                await exec("git", ["worktree", "remove", "--force", dir], workdir)
+              },
+            }
+          }
+          const deps = ReferenceResolver.gitDbtDeps(exec, {
+            readCompiled,
+            // Best-effort: empty schema ⇒ the engine abstains on table refs ⇒ build fallback.
+            // A warehouse schema resolver lands with the dialect coverage work.
+            buildSchema: async () => undefined,
+            checkoutBase,
+          })
+          const check: EquivalenceVerifier.CheckEquivalence = async (head, base, schema) => {
+            const r = await Dispatcher.call("altimate_core.equivalence", {
+              sql1: head,
+              sql2: base,
+              schema_context: schema as Record<string, unknown> | undefined,
+            })
+            const d = ((r as { data?: Record<string, unknown> }).data ?? {}) as {
+              equivalent?: boolean
+              validation_errors?: string[]
+              differences?: { severity?: string; description?: string }[]
+              confidence?: number
+            }
+            return {
+              equivalent: !!d.equivalent,
+              validation_errors: d.validation_errors ?? [],
+              differences: d.differences ?? [],
+              confidence: d.confidence,
+            }
+          }
+          return await EquivalenceVerifier.create(check, ReferenceResolver.create(deps), {
+            verify: buildVerify,
+          }).verify(root)
+        } catch {
+          return buildVerify(root) // the experimental path must never break the run
+        }
+      }
+
+      return buildVerify(root)
     }
 
+    // Run the tier ladder: cheap → verify → escalate with failing-check context, stop at first pass.
+    // Each tier re-invokes the existing single-run path with that model (and the escalation note
+    // prepended) in the SAME workspace, so a later tier fixes the prior attempt rather than restarting.
+    async function runRouted(sdk: OpencodeClient) {
+      // Only route when the workspace is verifiable. Without a deterministic gate, routing
+      // would accept the cheapest tier with no way to verify or escalate — silently
+      // downgrading quality. In a non-dbt project, run once with the user's model instead.
+      if (!(await Filesystem.exists(path.join(process.cwd(), "dbt_project.yml")))) {
+        await execute(sdk)
+        return
+      }
+      const baseMessage = message
+      const originalModel = args.model
+      const originalSession = args.session
+      // Reuse ONE session across tiers: tier-1 creates it; escalation tiers continue the
+      // same session so the stronger model sees the prior attempt + the failing-check note,
+      // rather than starting cold. Captured from execute()'s returned session id.
+      let sharedSessionID: string | undefined
+      const policy = Policy.resolve()
+      const tiers = await policy.tiers({ prompt: baseMessage })
+      let result
+      try {
+        result = await Router.route({
+          tiers,
+          runAgent: async (model, note) => {
+            args.model = model
+            message = note ? `${note}\n\n${baseMessage}` : baseMessage
+            if (sharedSessionID) args.session = sharedSessionID // continue tier-1's session
+            const sid = await execute(sdk)
+            if (sid && !sharedSessionID) sharedSessionID = sid // capture tier-1's session
+          },
+          verify: verifyWorkspace,
+        })
+      } finally {
+        // Always restore the mutated request state, even if a tier throws — otherwise
+        // `message`/`args.model`/`args.session` leak the last tier's state to any
+        // downstream logging/telemetry/retry.
+        message = baseMessage
+        args.model = originalModel
+        args.session = originalSession
+      }
+      const envelope = Verdict.build(result, { now: new Date().toISOString() })
+      if (args.format === "json") {
+        process.stdout.write(JSON.stringify({ type: "verdict", timestamp: Date.now(), ...envelope }) + EOL)
+      } else {
+        const tag = envelope.solved ? `✓ verified by ${envelope.solvedBy}` : "✗ unverified after all tiers"
+        UI.println(UI.Style.TEXT_INFO_BOLD + `~  router: ${tag} (policy: ${policy.source})`)
+      }
+      await Policy.reportOutcome(envelope)
+    }
+    // altimate_change end
+
     if (args.attach) {
       const headers = (() => {
         const password = args.password ?? process.env.OPENCODE_SERVER_PASSWORD
@@ -875,7 +1073,11 @@ You are speaking to a non-technical business executive. Follow these rules stric
         return { Authorization: auth }
       })()
       const sdk = createOpencodeClient({ baseUrl: args.attach, directory, headers })
-      return await execute(sdk)
+      // altimate_change start — route when enabled, else single run
+      if (Router.enabled()) await runRouted(sdk)
+      else await execute(sdk) // discard execute()'s returned session id (handler returns void)
+      return
+      // altimate_change end
     }
 
     await bootstrap(process.cwd(), async () => {
@@ -884,7 +1086,10 @@ You are speaking to a non-technical business executive. Follow these rules stric
         return Server.Default().fetch(request)
       }) as typeof globalThis.fetch
       const sdk = createOpencodeClient({ baseUrl: "http://altimate-code.internal", fetch: fetchFn })
-      await execute(sdk)
+      // altimate_change start — route when enabled, else single run
+      if (Router.enabled()) await runRouted(sdk)
+      else await execute(sdk)
+      // altimate_change end
     })
   },
 })
diff --git a/packages/opencode/src/router/README.md b/packages/opencode/src/router/README.md
new file mode 100644
index 000000000..bb6b96788
--- /dev/null
+++ b/packages/opencode/src/router/README.md
@@ -0,0 +1,90 @@
+# Verifier-gated router
+
+Run a cheap model first, verify the result deterministically, and escalate to a
+stronger model only when verification fails. Most runs finish at the cheap tier;
+the rest get a stronger attempt that receives the exact failing checks as context.
+Flag-gated (`ALTIMATE_ROUTER`), default off — the normal single-model path is unchanged.
+
+## Modules (pure, unit-tested)
+- **`verifier.ts`** — `Verifier`: a deterministic `Verdict` from `dbt build`/`dbt test`
+  output (`fromDbt`, `parseDbtSummary`, `failingNodes`). Every verdict carries a
+  **`Strength`** (`UNVERIFIABLE < BUILD < DBT_TEST < EQUIVALENCE`) and a **`Decision`**
+  (`OK | PROVEN_DIFFERENT | UNDECIDABLE | FAILED`) so consumers know *how strongly* a
+  result was proven, not just pass/fail. `Impl` is the pluggable verifier interface; the
+  default `dbtVerifier(run)` shells dbt (runner injected, fail-open). `fromEquivalence`
+  folds per-model equivalence results soundly. `ALLOW_ALL` passes everything (ungated).
+- **`equivalence-verifier.ts`** — `EquivalenceVerifier`: an optional, stronger `Impl` for
+  the *reference-available* regime (editing an existing model) — compares base↔head
+  compiled SQL via the altimate-core equivalence engine. **Not wired into the default run
+  path in v1** (see "What v1 verifies"); it ships dormant behind the dbt build verifier.
+- **`reference.ts`** — `ReferenceResolver`: produces the base↔head compiled-SQL pairs the
+  equivalence verifier needs (all git/dbt-compile/schema IO injected → unit-tested). Returns
+  `null` for greenfield (no base → build-fallback). Dormant alongside `equivalence-verifier`;
+  the production git+dbt-backed `Deps` + a flag-gated `verifyWorkspace` switch are the final
+  connect step, pending broader warehouse-dialect coverage in altimate-core
+  (equivalence currently abstains on dialect functions like duckdb `STRFTIME`).
+- **`router.ts`** — `Router`: the escalation mechanism. `route({tiers, runAgent, verify})`
+  runs each tier, verifies, escalates on a failed verdict with the failing checks
+  (`escalationContext`), stops at the first pass. `shouldEscalate` is **decision-aware**:
+  it escalates on `FAILED`/`PROVEN_DIFFERENT` but **never on `UNDECIDABLE`** (a stronger
+  model can't make an undecidable query decidable). `DEFAULT_LADDER` is ordered
+  cheapest → strongest; override via `ALTIMATE_ROUTER_LADDER`.
+- **`policy.ts`** — `Policy`: where the ladder comes from. `STATIC` is the built-in
+  default; `altimate(key)` fetches a per-context ladder from the altimate API when
+  `ALTIMATE_API_KEY` is set (degrades to static on any failure); `resolve()` picks
+  between them; `reportOutcome()` posts verified outcomes back (key-gated, best-effort).
+  `sanitizeTiers` validates + caps any ladder from the API.
+- **`verdict.ts`** — `Verdict.Envelope` (schemaVersion 2): a machine-checkable record of the
+  result (accepted tier, `strength` + `decision`, per-attempt history, checks, evidence
+  hash, timestamp, optional signature).
+
+## What v1 verifies (read before enabling)
+v1 ships the **dbt build** verifier: a verdict is `OK` at **`BUILD`** strength when
+`dbt build` exits 0 with no errors. That proves the output **compiles and the project's
+own tests pass — it does NOT prove value-correctness.** The envelope is honest about this:
+the `strength` field says `BUILD`, not `EQUIVALENCE`. Treat the receipt as
+"build-verified", not "proven equivalent". The `EQUIVALENCE`-strength path
+(`equivalence-verifier.ts`) is gated on broader warehouse-dialect coverage in altimate-core
+(decidability) and lands in a later release.
+
+## When to enable
+Enable when the **tier-1 model is a strong cheap model** (the default `deepseek-v4-flash`
+benchmarks at parity with frontier on dbt tasks). With a strong tier-1, escalation fires
+rarely (only on a genuine build failure), so the router is economically favorable. With a
+*weak* tier-1, escalation fires constantly and can cost as much as just using the strong
+model — don't do that. The router is a **model-selection + verify** tool first, an
+escalation ladder second.
+
+## Default ladder rationale
+`deepseek-v4-flash → glm-5.1 → claude-opus-4.8`. Tier-1 is a validated strong-cheap model.
+Benchmarking (N=10 dbt tasks) found tier-2 (`glm-5.1`) quality-redundant with tier-1, but
+it is retained as a **failover / data-governance substitute** slot pending a larger powered
+tiering study; the final tier is a frontier model for genuine build failures. Override the
+whole ladder with `ALTIMATE_ROUTER_LADDER`.
+
+## Configuration
+- `ALTIMATE_ROUTER=1` — enable routing (default off).
+- `ALTIMATE_ROUTER_LADDER` — comma-separated `provider/model` ids to override the default ladder.
+- `ALTIMATE_API_KEY` / `ALTIMATE_API_URL` — use the altimate API for the routing policy
+  and outcome reporting instead of the static ladder.
+
+## Integration
+`src/cli/cmd/run.ts` (`RunCommand`): when `Router.enabled()`, the run resolves a policy,
+runs each tier by re-invoking the existing run path with that model (escalation note
+prepended) in the same workspace, verifies with `dbt build` between tiers, and emits a
+verdict envelope. The default (non-router) path is untouched.
+
+## Tests
+- **Unit** — `test/router/{verifier,router,verdict,policy,verdict-strength,equivalence-verifier}.test.ts`.
+  Pure logic, incl. adversarial cases (dbt summary-line injection, ANSI/huge/multi-summary
+  output, endpoint response validation/capping), the tri-state strength/decision contract,
+  and the equivalence verifier's sound fallback (undecidable → build/test, never silent pass).
+- **E2E** (`test/router/*.e2e.test.ts`, env-gated — require docker + a dbt image +
+  network, excluded from default CI):
+  - `verifier.e2e` — real `dbt build` (pass / compile-error / failing-test) and that a
+    model emitting a fake summary does not change the verdict. `E2E_IMG=<image> bun test verifier.e2e`.
+  - `router.e2e` — real model calls + real dbt: cheap tier solves; an unsatisfiable
+    workspace escalates through tiers, caps, and threads failing-check context.
+    `OPENROUTER_API_KEY=… E2E_IMG=… bun test router.e2e`.
+  - `policy.e2e` — real network: live local server (incl. error/malformed/oversized
+    responses) and an unreachable endpoint, all degrade gracefully. `bun test policy.e2e`.
diff --git a/packages/opencode/src/router/equivalence-verifier.ts b/packages/opencode/src/router/equivalence-verifier.ts
new file mode 100644
index 000000000..bd854a580
--- /dev/null
+++ b/packages/opencode/src/router/equivalence-verifier.ts
@@ -0,0 +1,102 @@
+/**
+ * Equivalence-backed verifier (reference-available regime).
+ *
+ * For a change to an EXISTING model, the strongest deterministic signal is not
+ * "does it build" but "is the new SQL semantically equivalent to the prior version".
+ * This Impl resolves a reference (base) compiled SQL per touched model, compares it
+ * to the head compiled SQL via the altimate-core equivalence engine, and folds the
+ * per-model results into one Verdict (see `Verifier.fromEquivalence`).
+ *
+ * Soundness is preserved end-to-end: the engine never reports false-equivalence, and
+ * an undecidable result (validation errors / unsupported dialect) maps to UNDECIDABLE,
+ * which the router does NOT escalate on — the caller falls back to build/test. A
+ * stronger model cannot make an undecidable query decidable.
+ *
+ * Both the equivalence call and the reference resolution are injected, so this is
+ * fully unit-testable without the native engine, dbt, or git.
+ */
+import { Verifier } from "./verifier"
+
+export namespace EquivalenceVerifier {
+  /** One model's base→head SQL pair plus the schema needed to resolve refs. */
+  export interface Pair {
+    model: string
+    baseSql: string
+    headSql: string
+    /** Opaque schema handle passed through to the engine (e.g. altimate-core Schema). */
+    schema?: unknown
+  }
+
+  /**
+   * Resolves the comparison inputs for the touched models in a workspace.
+   * Returns null when there is NO reference (greenfield) — the caller then uses the
+   * build/test verifier instead. Returns [] when a reference regime applies but no
+   * models were touched (treated as nothing-to-verify).
+   */
+  export interface ReferenceResolver {
+    resolve(workdir: string): Promise<Pair[] | null>
+  }
+
+  /** The raw equivalence call (native `altimate_core.equivalence`), injected for testability. */
+  export type CheckEquivalence = (
+    headSql: string,
+    baseSql: string,
+    schema: unknown,
+  ) => Promise<Verifier.EquivalenceResult>
+
+  /**
+   * Build an Impl. `check` performs one equivalence comparison; `resolver` provides the
+   * base/head pairs. `fallback` (typically the dbt build verifier) is used when there is
+   * no reference (greenfield) or when the engine is undecidable — never a silent pass.
+   */
+  export function create(
+    check: CheckEquivalence,
+    resolver: ReferenceResolver,
+    fallback: Verifier.Impl,
+  ): Verifier.Impl {
+    return {
+      async verify(workdir: string): Promise<Verifier.Verdict> {
+        let pairs: Awaited<ReturnType<ReferenceResolver["resolve"]>>
+        try {
+          pairs = await resolver.resolve(workdir)
+        } catch (e) {
+          // Can't resolve a reference → degrade to the build/test verifier (honest).
+          return fallback.verify(workdir)
+        }
+        // Greenfield (no reference): equivalence is not applicable.
+        if (pairs === null) return fallback.verify(workdir)
+
+        const results: { model: string; result: Verifier.EquivalenceResult }[] = []
+        for (const p of pairs) {
+          try {
+            results.push({ model: p.model, result: await check(p.headSql, p.baseSql, p.schema) })
+          } catch (e) {
+            // A failed comparison is undecidable for that model, not "different".
+            results.push({
+              model: p.model,
+              result: { equivalent: false, validation_errors: [`equivalence error: ${String(e)}`] },
+            })
+          }
+        }
+        const verdict = Verifier.fromEquivalence(results)
+        // Undecidable equivalence → fall back to the reference-free gate (build/test),
+        // so we never accept on an abstain alone. The DECISION must come from the
+        // fallback, not be blanket-stamped UNDECIDABLE: if the fallback build FAILS we
+        // must surface FAILED so the router escalates — stamping UNDECIDABLE here would
+        // swallow a real build failure (UNDECIDABLE never escalates). If it passes, the
+        // result is accepted at BUILD strength (the "equivalence couldn't decide" fact is
+        // carried by strength<EQUIVALENCE + reason), keeping the ok⟺OK invariant intact.
+        if (verdict.decision === Verifier.Decision.UNDECIDABLE) {
+          const fb = await fallback.verify(workdir)
+          return {
+            ...fb,
+            decision: fb.ok ? Verifier.Decision.OK : Verifier.Decision.FAILED,
+            reason: `equivalence undecidable; fell back to build/test (${fb.reason ?? (fb.ok ? "passed" : "failed")})`,
+            checks: [...verdict.checks, ...fb.checks],
+          }
+        }
+        return verdict
+      },
+    }
+  }
+}
diff --git a/packages/opencode/src/router/policy.ts b/packages/opencode/src/router/policy.ts
new file mode 100644
index 000000000..ae6eb746e
--- /dev/null
+++ b/packages/opencode/src/router/policy.ts
@@ -0,0 +1,131 @@
+/**
+ * Routing policy — the decision of WHAT to route to, kept separate from the
+ * mechanism that executes it.
+ *
+ * Two sources:
+ *  - `STATIC`: the built-in default ladder, always available.
+ *  - `altimate(key)`: when an altimate API key is configured, the routing policy is
+ *    fetched per-context from the altimate API and used instead, and verified
+ *    outcomes are reported back so the policy can be tuned over time.
+ *
+ * The client executes whatever policy it is handed. The SaaS policy activates only
+ * when `ALTIMATE_API_KEY` is present, otherwise the static ladder is used.
+ * Network/transport failures degrade to STATIC.
+ */
+import { Router } from "./router"
+import type { Verdict } from "./verdict"
+
+export namespace Policy {
+  /** Signals available for routing decisions (extended over time). */
+  export interface RoutingContext {
+    prompt?: string
+    projectType?: string
+    taskId?: string
+  }
+
+  export interface RoutingPolicy {
+    source: "static" | "altimate"
+    tiers(ctx: RoutingContext): Promise<Router.Tier[]>
+  }
+
+  type Fetch = typeof globalThis.fetch
+
+  /** Defensive cap: a bad/compromised policy endpoint must not inject a cost-bomb ladder. */
+  export const MAX_TIERS = 8
+
+  /**
+   * Validate + cap a ladder returned by the policy endpoint. Keeps only entries with a
+   * non-empty string `model`, derives a label when missing, caps to MAX_TIERS. Returns
+   * null if nothing usable (caller falls back to the static ladder).
+   */
+  /** A model id must look like `provider/model[/...]` — plain chars only, no whitespace/control. */
+  const MODEL_RE = /^[A-Za-z0-9._-]+(?:\/[A-Za-z0-9._-]+)+$/
+
+  export function sanitizeTiers(raw: unknown): Router.Tier[] | null {
+    if (!Array.isArray(raw)) return null
+    const out: Router.Tier[] = []
+    for (const t of raw) {
+      const model = (t as any)?.model
+      if (typeof model !== "string") continue
+      const m = model.trim()
+      if (!m || m.length > 200 || !MODEL_RE.test(m)) continue
+      const rawLabel = typeof (t as any)?.label === "string" && (t as any).label ? (t as any).label : m.split("/").pop() || m
+      // Strip non-printable/ANSI — the label is printed to the terminal.
+      const label = String(rawLabel).replace(/[^\x20-\x7E]/g, "").slice(0, 100) || m
+      out.push({ model: m, label })
+      if (out.length >= MAX_TIERS) break
+    }
+    return out.length ? out : null
+  }
+
+  export function apiKey(): string | undefined {
+    return process.env["ALTIMATE_API_KEY"] || undefined
+  }
+
+  export function baseUrl(): string {
+    return process.env["ALTIMATE_API_URL"] || "https://api.altimate.ai"
+  }
+
+  /** Built-in default ladder (env-overridable via ALTIMATE_ROUTER_LADDER). */
+  export const STATIC: RoutingPolicy = {
+    source: "static",
+    async tiers() {
+      return Router.ladder()
+    },
+  }
+
+  /**
+   * Customer routing policy served by the altimate API. Resolves the per-context
+   * ladder for this account; degrades to the static ladder if the service is
+   * unreachable or returns nothing usable.
+   */
+  export function altimate(key: string, base: string = baseUrl(), fetchImpl: Fetch = fetch): RoutingPolicy {
+    return {
+      source: "altimate",
+      async tiers(ctx: RoutingContext): Promise<Router.Tier[]> {
+        try {
+          const res = await fetchImpl(`${base}/v1/router/policy`, {
+            method: "POST",
+            headers: { "Content-Type": "application/json", Authorization: `Bearer ${key}` },
+            body: JSON.stringify(ctx),
+            signal: AbortSignal.timeout(3000),
+          })
+          if (!res.ok) return Router.ladder()
+          const data = (await res.json()) as { tiers?: unknown }
+          return sanitizeTiers(data?.tiers) ?? Router.ladder()
+        } catch {
+          return Router.ladder()
+        }
+      },
+    }
+  }
+
+  /** The active policy: customer policy when an altimate key is set, else the static ladder. */
+  export function resolve(fetchImpl: Fetch = fetch): RoutingPolicy {
+    const key = apiKey()
+    return key ? altimate(key, baseUrl(), fetchImpl) : STATIC
+  }
+
+  /**
+   * Report a verified outcome back to the altimate service so the customer's policy
+   * improves. Best-effort and key-gated — a no-op without a key, and never throws.
+   */
+  export async function reportOutcome(
+    envelope: Verdict.Envelope,
+    base: string = baseUrl(),
+    fetchImpl: Fetch = fetch,
+  ): Promise<void> {
+    const key = apiKey()
+    if (!key) return
+    try {
+      await fetchImpl(`${base}/v1/router/outcomes`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json", Authorization: `Bearer ${key}` },
+        body: JSON.stringify(envelope),
+        signal: AbortSignal.timeout(3000),
+      })
+    } catch {
+      /* best-effort: outcome reporting must never break the run */
+    }
+  }
+}
diff --git a/packages/opencode/src/router/reference.ts b/packages/opencode/src/router/reference.ts
new file mode 100644
index 000000000..03058ac3b
--- /dev/null
+++ b/packages/opencode/src/router/reference.ts
@@ -0,0 +1,118 @@
+/**
+ * Reference resolver for the equivalence verifier (reference-available regime).
+ *
+ * To compare a changed dbt model against its prior version, we need the COMPILED SQL
+ * of both sides (equivalence runs on compiled SQL, never raw Jinja) plus the schema to
+ * resolve table/column refs. This module produces `EquivalenceVerifier.Pair[]` for the
+ * models a change touched, or `null` when there is no reference (greenfield — the caller
+ * then uses the build/test verifier).
+ *
+ * All IO (git, dbt compile, schema) is injected via `Deps`, so the orchestration is
+ * fully unit-testable without git/dbt. A git+dbt-backed `Deps` is the production impl.
+ */
+import { EquivalenceVerifier } from "./equivalence-verifier"
+
+export namespace ReferenceResolver {
+  /** "WORKING" = the current working tree; otherwise a git ref (the base/PR-target). */
+  export type Ref = string
+
+  export interface Deps {
+    /** The base ref to diff against (PR merge-base or HEAD~), or null when none exists (greenfield). */
+    baseRef(workdir: string): Promise<string | null>
+    /** Model names whose .sql changed vs the base. */
+    changedModels(workdir: string, base: Ref): Promise<string[]>
+    /** model -> compiled SQL at a given ref ("WORKING" or a git ref). */
+    compiledSql(workdir: string, ref: Ref): Promise<Map<string, string>>
+    /** Opaque schema handle passed to the equivalence engine (e.g. altimate-core Schema). */
+    schema(workdir: string): Promise<unknown>
+  }
+
+  /** Run a shell command; returns stdout + exit code. Injected so `gitDbtDeps` is testable. */
+  export type Exec = (cmd: string, args: string[], cwd: string) => Promise<{ stdout: string; code: number }>
+
+  export interface GitDbtOptions {
+    /** dbt binary (e.g. "dbt" or "altimate-dbt"). */
+    dbt?: string
+    /** Read compiled model SQL after a `dbt compile` in `dir` → Map<model, sql>. */
+    readCompiled: (dir: string) => Promise<Map<string, string>>
+    /** Build the engine schema for the project (best-effort; empty Schema ⇒ engine abstains → build-fallback). */
+    buildSchema: (workdir: string) => Promise<unknown>
+    /** Make an isolated checkout of `ref` for base-side compilation (e.g. git worktree); returns its path + a cleanup. */
+    checkoutBase: (workdir: string, ref: string) => Promise<{ dir: string; cleanup: () => Promise<void> }>
+  }
+
+  /**
+   * Production `Deps`: git for base/changed detection, dbt to compile each side, an
+   * injected schema builder. All process IO goes through `exec`/`opts` so the orchestration
+   * is unit-tested without git/dbt. NOTE: the live path (git-worktree base compile +
+   * warehouse schema) is pending E2E validation — it ships behind a flag and degrades to a
+   * build verdict (the engine abstains without a resolvable schema / unsupported dialect).
+   */
+  export function gitDbtDeps(exec: Exec, opts: GitDbtOptions): Deps {
+    const dbt = opts.dbt ?? "dbt"
+    return {
+      async baseRef(workdir) {
+        const r = await exec("git", ["rev-parse", "--verify", "HEAD"], workdir)
+        return r.code === 0 && r.stdout.trim() ? r.stdout.trim() : null // no commits ⇒ greenfield
+      },
+      async changedModels(workdir, base) {
+        const r = await exec("git", ["diff", "--name-only", base, "--", "models"], workdir)
+        if (r.code !== 0) return []
+        return r.stdout
+          .split("\n")
+          .map((l) => l.trim())
+          .filter((l) => l.endsWith(".sql"))
+          .map((l) => l.split("/").pop()!.replace(/\.sql$/, ""))
+      },
+      async compiledSql(workdir, ref) {
+        if (ref === "WORKING") {
+          await exec(dbt, ["compile"], workdir)
+          return opts.readCompiled(workdir)
+        }
+        const base = await opts.checkoutBase(workdir, ref)
+        try {
+          await exec(dbt, ["deps"], base.dir)
+          await exec(dbt, ["compile"], base.dir)
+          return await opts.readCompiled(base.dir)
+        } finally {
+          await base.cleanup()
+        }
+      },
+      schema: (workdir) => opts.buildSchema(workdir),
+    }
+  }
+
+  /**
+   * Build a `EquivalenceVerifier.ReferenceResolver` from injected deps.
+   * Returns null (→ greenfield/build-fallback) when there is no base ref; returns [] when a
+   * base exists but no models changed; otherwise one Pair per changed model present on BOTH
+   * sides (a model that's new on head has no base → not equivalence-checkable, skipped here).
+   */
+  export function create(deps: Deps): EquivalenceVerifier.ReferenceResolver {
+    return {
+      async resolve(workdir: string): Promise<EquivalenceVerifier.Pair[] | null> {
+        const base = await deps.baseRef(workdir)
+        if (base === null) return null // greenfield — no reference
+
+        const changed = await deps.changedModels(workdir, base)
+        if (changed.length === 0) return []
+
+        const [headSql, baseSql, schema] = await Promise.all([
+          deps.compiledSql(workdir, "WORKING"),
+          deps.compiledSql(workdir, base),
+          deps.schema(workdir),
+        ])
+
+        const pairs: EquivalenceVerifier.Pair[] = []
+        for (const model of changed) {
+          const head = headSql.get(model)
+          const baseM = baseSql.get(model)
+          // Both sides must compile to a SQL string; a model new on head (no base) is not
+          // equivalence-checkable and is left to the build/test gate.
+          if (head && baseM) pairs.push({ model, baseSql: baseM, headSql: head, schema })
+        }
+        return pairs
+      },
+    }
+  }
+}
diff --git a/packages/opencode/src/router/router.ts b/packages/opencode/src/router/router.ts
new file mode 100644
index 000000000..faecb6c37
--- /dev/null
+++ b/packages/opencode/src/router/router.ts
@@ -0,0 +1,130 @@
+/**
+ * Verifier-gated model router — the escalation ladder.
+ *
+ * Run the CHEAP tier first; verify the workspace deterministically (Verifier);
+ * if the verdict is not ok, escalate to the next stronger tier, handing it the
+ * exact failing checks so it fixes rather than restarts blind. Stop at the first
+ * passing verdict (or the top of the ladder).
+ *
+ * Because the cheap tier handles most tasks, escalation is rare. The default ladder
+ * is ordered cheapest → strongest and can be overridden per deployment.
+ *
+ * Pure orchestration: `runAgent` + `verify` are injected → unit-testable without
+ * a live model or dbt. Flag-gated (`ALTIMATE_ROUTER`); default off.
+ */
+import { Verifier } from "./verifier"
+
+export namespace Router {
+  export interface Tier {
+    model: string
+    label: string
+  }
+
+  /**
+   * Default ladder, ordered cheapest → strongest. A tier is only reached when the
+   * previous tier's output fails verification, so most runs complete at the cheap tier.
+   * Override per deployment via `ALTIMATE_ROUTER_LADDER` or an injected policy.
+   */
+  export const DEFAULT_LADDER: Tier[] = [
+    { model: "openrouter/deepseek/deepseek-v4-flash", label: "deepseek-v4-flash" },
+    { model: "openrouter/z-ai/glm-5.1", label: "glm-5.1" },
+    { model: "openrouter/anthropic/claude-opus-4.8", label: "claude-opus-4.8" },
+  ]
+
+  export function enabled(): boolean {
+    return process.env["ALTIMATE_ROUTER"] === "1"
+  }
+
+  /** Ladder from `ALTIMATE_ROUTER_LADDER` (comma-separated provider/model ids) or the default. */
+  export function ladder(): Tier[] {
+    const env = process.env["ALTIMATE_ROUTER_LADDER"]
+    if (!env) return DEFAULT_LADDER
+    const tiers = env
+      .split(",")
+      .map((s) => s.trim())
+      .filter(Boolean)
+      .map((model) => ({ model, label: model.split("/").pop() || model }))
+    return tiers.length ? tiers : DEFAULT_LADDER
+  }
+
+  /**
+   * Escalate iff the verdict is escalation-worthy AND a stronger tier remains.
+   *
+   * Decision-aware: escalate on a build/test FAILURE or a PROVEN_DIFFERENT equivalence
+   * verdict, but NOT on UNDECIDABLE — a stronger model does not make an undecidable
+   * query decidable, and escalating on uncertainty is the gated-build cost-blowup
+   * failure mode. UNDECIDABLE is handled by the verifier's own build/test fallback.
+   * Falls back to the legacy `!ok` rule when a verdict carries no `decision` (back-compat).
+   */
+  export function shouldEscalate(verdict: Verifier.Verdict, tierIndex: number, tiers: Tier[]): boolean {
+    if (tierIndex >= tiers.length - 1) return false
+    if (verdict.decision === undefined) return !verdict.ok
+    return (
+      verdict.decision === Verifier.Decision.FAILED ||
+      verdict.decision === Verifier.Decision.PROVEN_DIFFERENT
+    )
+  }
+
+  /** The note handed to the next tier — names the exact failing checks so it fixes them. */
+  export function escalationContext(prev: Tier, verdict: Verifier.Verdict): string {
+    const failing = verdict.checks.filter((c) => !c.ok).map((c) => c.name)
+    const lines = [
+      `A previous attempt (by ${prev.label}) did not pass verification.`,
+      verdict.reason ? `Verifier reason: ${verdict.reason}` : "",
+      failing.length ? `Failing checks to fix: ${failing.join(", ")}.` : "",
+      `The prior changes are in the workspace — fix these specific failures; do not start over.`,
+    ]
+    return lines.filter(Boolean).join("\n")
+  }
+
+  export interface Attempt {
+    tier: Tier
+    verdict: Verifier.Verdict
+  }
+
+  export interface RouteResult {
+    solved: boolean
+    solvedBy?: Tier
+    attempts: Attempt[]
+  }
+
+  /**
+   * Drive the ladder: run each tier, verify, escalate on failure with context,
+   * stop at the first ok verdict. `runAgent(model, escalationNote?)` performs the
+   * agent run in the shared workspace; `verify()` judges the post-run workspace.
+   */
+  export async function route(params: {
+    tiers?: Tier[]
+    runAgent: (model: string, escalationNote?: string) => Promise<void>
+    verify: () => Promise<Verifier.Verdict>
+  }): Promise<RouteResult> {
+    const tiers = params.tiers ?? ladder()
+    const attempts: Attempt[] = []
+    let note: string | undefined
+    for (let i = 0; i < tiers.length; i++) {
+      const tier = tiers[i]
+      // A thrown agent/verify error is treated as a failed attempt so the ladder can
+      // escalate, rather than aborting the whole run on a transient failure in one tier.
+      let verdict: Verifier.Verdict
+      try {
+        await params.runAgent(tier.model, note)
+        verdict = await params.verify()
+      } catch (e) {
+        // A thrown tier is a FAILED attempt (escalate to the next tier), at UNVERIFIABLE
+        // strength since no gate actually judged the output.
+        verdict = {
+          ok: false,
+          strength: Verifier.Strength.UNVERIFIABLE,
+          decision: Verifier.Decision.FAILED,
+          reason: `tier error: ${String(e)}`,
+          checks: [],
+        }
+      }
+      attempts.push({ tier, verdict })
+      if (verdict.ok) return { solved: true, solvedBy: tier, attempts }
+      if (!shouldEscalate(verdict, i, tiers)) break
+      note = escalationContext(tier, verdict)
+    }
+    return { solved: false, attempts }
+  }
+}
diff --git a/packages/opencode/src/router/verdict.ts b/packages/opencode/src/router/verdict.ts
new file mode 100644
index 000000000..f0a144b97
--- /dev/null
+++ b/packages/opencode/src/router/verdict.ts
@@ -0,0 +1,96 @@
+/**
+ * Verdict envelope — a machine-checkable record of a routed result.
+ *
+ * Records which tier produced the accepted result, the checks that passed, an
+ * evidence fingerprint, and an optional signature — a structured summary of "this
+ * output passed deterministic verification by tier X" for downstream/audit use.
+ *
+ * Pure + dependency-free: the timestamp and signer are injected so this never
+ * reaches for Date.now / crypto itself.
+ */
+import type { Verifier } from "./verifier"
+import type { Router } from "./router"
+
+export namespace Verdict {
+  export interface AttemptRecord {
+    model: string
+    ok: boolean
+    /** Gate conclusion for this attempt (ok / proven_different / undecidable / failed). */
+    decision?: Verifier.Decision
+    /** Evidence strength for this attempt (unverifiable / build / dbt_test / equivalence). */
+    strength?: Verifier.Strength
+    reason?: string
+    failing: string[]
+  }
+
+  export interface Envelope {
+    /** Envelope schema version, for forward-compat as the shape evolves. */
+    schemaVersion: string
+    solved: boolean
+    solvedBy: string | null
+    /** ladder index that produced the passing verdict, or null if unsolved. */
+    tier: number | null
+    /** true when the accepted result could not actually be verified (fail-open). */
+    unverifiable: boolean
+    /**
+     * Evidence strength of the accepted result — the core trust signal. EQUIVALENCE
+     * means proven equivalent to a reference; BUILD means it merely compiled.
+     */
+    strength?: Verifier.Strength
+    /** Gate conclusion of the accepted result. */
+    decision?: Verifier.Decision
+    attempts: AttemptRecord[]
+    checks: Verifier.Check[]
+    evidenceHash: string
+    createdAt: string
+    signature?: string
+  }
+
+  /** Envelope schema. v2 adds per-result `strength` + `decision` (the trust signal). */
+  export const SCHEMA_VERSION = "2"
+
+  /** Deterministic, dependency-free fingerprint of the evidence (djb2 → hex). Not a signature. */
+  export function evidenceHash(s: string): string {
+    let h = 5381
+    for (let i = 0; i < s.length; i++) h = (((h << 5) + h) ^ s.charCodeAt(i)) >>> 0
+    return "djb2:" + h.toString(16).padStart(8, "0")
+  }
+
+  /**
+   * Build the envelope from a routing result. `now` (ISO string) and an optional
+   * `sign` function are injected — the product wires a real signer here.
+   */
+  export function build(
+    result: Router.RouteResult,
+    opts: { now: string; sign?: (unsigned: Omit<Envelope, "signature">) => string },
+  ): Envelope {
+    const attempts: AttemptRecord[] = result.attempts.map((a) => ({
+      model: a.tier.label,
+      ok: a.verdict.ok,
+      decision: a.verdict.decision,
+      strength: a.verdict.strength,
+      reason: a.verdict.reason,
+      failing: a.verdict.checks.filter((c) => !c.ok).map((c) => c.name),
+    }))
+    const last = result.attempts.at(-1)
+    const unsigned: Omit<Envelope, "signature"> = {
+      schemaVersion: SCHEMA_VERSION,
+      solved: result.solved,
+      solvedBy: result.solvedBy?.label ?? null,
+      tier: result.solved ? result.attempts.length - 1 : null,
+      unverifiable: result.solved ? !!last?.verdict.unverifiable : false,
+      strength: last?.verdict.strength,
+      decision: last?.verdict.decision,
+      attempts,
+      checks: last?.verdict.checks ?? [],
+      evidenceHash: evidenceHash(last?.verdict.evidence ?? ""),
+      createdAt: opts.now,
+    }
+    const signature = opts.sign?.(unsigned)
+    return signature ? { ...unsigned, signature } : unsigned
+  }
+
+  export function serialize(e: Envelope): string {
+    return JSON.stringify(e)
+  }
+}
diff --git a/packages/opencode/src/router/verifier.ts b/packages/opencode/src/router/verifier.ts
new file mode 100644
index 000000000..7f6311b12
--- /dev/null
+++ b/packages/opencode/src/router/verifier.ts
@@ -0,0 +1,289 @@
+/**
+ * Deterministic verifier for the verifier-gated router.
+ *
+ * After an agent run completes, a verifier inspects the resulting workspace and
+ * returns a Verdict: did the work actually succeed? For dbt/SQL this is checkable,
+ * not estimated — `dbt build` exits 0 and `dbt test` passes. A not-ok verdict means
+ * the attempt is wrong, so the router escalates to a stronger model.
+ *
+ * The default `dbtVerifier` runs `dbt build`; a different verifier can be injected
+ * via the `Impl` interface (e.g. a semantic-equivalence check).
+ *
+ * Pure parsing + an injected command runner → fully testable without dbt.
+ */
+
+export namespace Verifier {
+  /** One graded check (a dbt test, a model build, or an equivalence assertion). */
+  export interface Check {
+    name: string
+    ok: boolean
+    detail?: string
+  }
+
+  /**
+   * How strong is the evidence behind a verdict? Ordered weakest → strongest.
+   * The signed envelope carries this so a consumer knows whether a result was
+   * merely build-verified (value unknown) or proven equivalent to a reference.
+   */
+  export enum Strength {
+    /** No gate could run (fail-open). The result is NOT proven. */
+    UNVERIFIABLE = "unverifiable",
+    /** `dbt build` exited 0 with no errors: it compiles, but value-correctness is unknown. */
+    BUILD = "build",
+    /** dbt schema/unit tests passed: asserted invariants hold (still not full correctness). */
+    DBT_TEST = "dbt_test",
+    /** Proven semantically equivalent to a reference by the equivalence engine. */
+    EQUIVALENCE = "equivalence",
+  }
+
+  /**
+   * What did the gate conclude? Distinct from {@link Strength} (how it was judged).
+   * `UNDECIDABLE` is the equivalence engine's honest abstain — it must NEVER be
+   * silently treated as a pass, and must NOT trigger escalation (a stronger model
+   * does not make an undecidable query decidable); the caller falls back + flags.
+   */
+  export enum Decision {
+    OK = "ok",
+    /** The equivalence engine found a MATERIAL difference vs the reference. */
+    PROVEN_DIFFERENT = "proven_different",
+    /** The engine could not decide (validation errors / unsupported syntax / no reference). */
+    UNDECIDABLE = "undecidable",
+    /** A build/test gate failed. */
+    FAILED = "failed",
+  }
+
+  export interface Verdict {
+    ok: boolean
+    /**
+     * True when verification could not actually run (e.g. no dbt project, dbt binary
+     * missing). Distinct from a genuine pass: `ok` is true so the run is not blocked
+     * (fail-open), but the result was NOT proven — consumers/the envelope can tell.
+     */
+    unverifiable?: boolean
+    /**
+     * Evidence strength (optional for back-compat; populated by every constructor).
+     * Lets the signed envelope say "verified at strength EQUIVALENCE" vs "BUILD only".
+     */
+    strength?: Strength
+    /**
+     * Gate conclusion (optional for back-compat; populated by every constructor).
+     * Drives decision-aware escalation in the router.
+     */
+    decision?: Decision
+    /** Engine confidence in [0,1] when available (equivalence). Never 1.0 — soundness margin. */
+    confidence?: number
+    /** Human/agent-readable reason when not ok (fed to the next tier on escalation). */
+    reason?: string
+    checks: Check[]
+    /** Raw evidence excerpt (for the verdict envelope / audit). */
+    evidence?: string
+  }
+
+  /** One model's equivalence result (subset of altimate-core's EquivalenceResult). */
+  export interface EquivalenceResult {
+    equivalent: boolean
+    /** Non-empty ⇒ the engine could not decide (undecidable), NOT "different". */
+    validation_errors?: string[]
+    /** Material differences when decidably non-equivalent. */
+    differences?: { severity?: string; description?: string }[]
+    confidence?: number
+  }
+
+  /** dbt's "Done. PASS=.. WARN=.. ERROR=.. SKIP=.. TOTAL=.." summary. */
+  export interface DbtSummary {
+    pass: number
+    warn: number
+    error: number
+    skip: number
+    total: number
+  }
+
+  /** Result of running a verification command (injected; real impl shells out). */
+  export interface RunResult {
+    output: string
+    exitCode: number
+  }
+
+  /** Pluggable judgment. Default = dbtVerifier; a custom verifier can be injected. */
+  export interface Impl {
+    verify(workdir: string): Verdict | Promise<Verdict>
+  }
+
+  /**
+   * Parse the dbt run summary line. Returns null if not present (build never finished).
+   *
+   * Hardening: takes the LAST matching line, not the first. dbt prints its real run
+   * summary last; a malicious/confused model could emit SQL containing a fake
+   * "Done. PASS=99 ERROR=0" that dbt echoes earlier in its error log. (The exitCode
+   * check in `fromDbt` is the primary backstop; last-match is defense in depth.)
+   */
+  export function parseDbtSummary(output: string): DbtSummary | null {
+    const re = /PASS=(\d+)\s+WARN=(\d+)\s+ERROR=(\d+)\s+SKIP=(\d+)(?:\s+NO-OP=\d+)?\s+TOTAL=(\d+)/gi
+    let last: RegExpExecArray | null = null
+    let m: RegExpExecArray | null
+    while ((m = re.exec(output))) last = m
+    if (!last) return null
+    return { pass: +last[1], warn: +last[2], error: +last[3], skip: +last[4], total: +last[5] }
+  }
+
+  /**
+   * Extract the dbt nodes that failed (the actionable detail for escalation).
+   * Matches dbt's standard phrasings:
+   *   "Failure in test not_null_orders_id (models/schema.yml)"
+   *   "Error in model my_model (models/my_model.sql)"
+   *   "Compilation Error in model stg_x (...)"
+   */
+  export function failingNodes(output: string): Check[] {
+    const out: Check[] = []
+    const re = /(?:Compilation Error|Failure|Error|Runtime Error) in (test|model|seed|snapshot|unit_test) ([\w.]+)/gi
+    let m: RegExpExecArray | null
+    const seen = new Set<string>()
+    while ((m = re.exec(output))) {
+      const name = `${m[1]}:${m[2]}`
+      if (seen.has(name)) continue
+      seen.add(name)
+      out.push({ name: m[2], ok: false, detail: m[0] })
+    }
+    return out
+  }
+
+  /**
+   * Build a Verdict from a `dbt build`/`dbt test` run.
+   * ok ⇔ command exited 0 AND a summary was produced AND it had zero ERRORs.
+   * A missing summary (build crashed / never ran) is NOT ok.
+   */
+  export function fromDbt(output: string, exitCode: number): Verdict {
+    const s = parseDbtSummary(output)
+    const failing = failingNodes(output)
+    const ok = exitCode === 0 && !!s && s.error === 0
+    let reason: string | undefined
+    if (!ok) {
+      if (!s) reason = "dbt build did not complete (no run summary found)"
+      else if (s.error > 0)
+        reason = `${s.error} dbt error(s); ${s.pass}/${s.total} passed` +
+          (failing.length ? ` — failing: ${failing.map((f) => f.name).join(", ")}` : "")
+      else if (exitCode !== 0) reason = `dbt exited ${exitCode}`
+    }
+    const checks: Check[] = failing.length
+      ? failing
+      : s
+        ? [{ name: "dbt build", ok, detail: `PASS=${s.pass} ERROR=${s.error} TOTAL=${s.total}` }]
+        : [{ name: "dbt build", ok: false, detail: "no summary" }]
+    return {
+      ok,
+      strength: Strength.BUILD,
+      decision: ok ? Decision.OK : Decision.FAILED,
+      reason,
+      checks,
+      evidence: output.slice(-800),
+    }
+  }
+
+  /**
+   * Build a Verdict from per-model equivalence results (reference-available regime).
+   *
+   * Folds N model verdicts into one, honoring the engine's soundness:
+   *  - any model with `validation_errors` (or a no-reference/error result) ⇒ UNDECIDABLE
+   *    for the whole verdict (the caller MUST fall back to build/test, never pass silently);
+   *  - else any model decidably non-equivalent ⇒ PROVEN_DIFFERENT (escalation-worthy);
+   *  - else (every model proven equivalent) ⇒ OK at EQUIVALENCE strength.
+   *
+   * `ok` is true only for the all-equivalent case. UNDECIDABLE and PROVEN_DIFFERENT are
+   * NOT `ok` (the run is not accepted on equivalence alone), but they differ in how the
+   * router reacts (see Router.shouldEscalate): escalate on PROVEN_DIFFERENT, fall back on
+   * UNDECIDABLE.
+   */
+  export function fromEquivalence(results: { model: string; result: EquivalenceResult }[]): Verdict {
+    if (results.length === 0) {
+      return {
+        ok: false,
+        strength: Strength.UNVERIFIABLE,
+        decision: Decision.UNDECIDABLE,
+        reason: "no models to compare (no reference resolved)",
+        checks: [],
+      }
+    }
+    const checks: Check[] = []
+    let anyUndecidable = false
+    let anyDifferent = false
+    // Track confidence only when a model actually reports it — never synthesize a
+    // 1.0 default (that would read as "100% confident" on a non-OK verdict).
+    let minConfidence: number | undefined
+    for (const { model, result } of results) {
+      const undecidable = !!(result.validation_errors && result.validation_errors.length > 0)
+      if (typeof result.confidence === "number")
+        minConfidence = minConfidence === undefined ? result.confidence : Math.min(minConfidence, result.confidence)
+      if (undecidable) {
+        anyUndecidable = true
+        checks.push({ name: model, ok: false, detail: `undecidable: ${result.validation_errors!.join("; ")}` })
+      } else if (!result.equivalent) {
+        anyDifferent = true
+        const diff = (result.differences ?? []).map((d) => d.description ?? d.severity ?? "diff").join("; ")
+        checks.push({ name: model, ok: false, detail: `not equivalent: ${diff || "material difference"}` })
+      } else {
+        checks.push({ name: model, ok: true, detail: "equivalent" })
+      }
+    }
+    // PROVEN_DIFFERENT outranks UNDECIDABLE: a proven material diff is actionable (escalate),
+    // even if another model in the change was undecidable.
+    if (anyDifferent) {
+      return {
+        ok: false,
+        strength: Strength.EQUIVALENCE,
+        decision: Decision.PROVEN_DIFFERENT,
+        confidence: minConfidence,
+        reason: `not equivalent to reference: ${checks.filter((c) => !c.ok).map((c) => c.name).join(", ")}`,
+        checks,
+      }
+    }
+    if (anyUndecidable) {
+      return {
+        ok: false,
+        strength: Strength.BUILD, // equivalence couldn't decide; caller falls back to build/test
+        decision: Decision.UNDECIDABLE,
+        reason: "equivalence undecidable for some models — falling back to build/test",
+        checks,
+      }
+    }
+    return {
+      ok: true,
+      strength: Strength.EQUIVALENCE,
+      decision: Decision.OK,
+      confidence: minConfidence,
+      checks,
+    }
+  }
+
+  /** Default that passes everything (ungated) — used when no real verifier is configured. */
+  export const ALLOW_ALL: Impl = {
+    verify: () => ({ ok: true, strength: Strength.UNVERIFIABLE, decision: Decision.OK, unverifiable: true, checks: [] }),
+  }
+
+  /**
+   * Default deterministic verifier: runs `dbt build` in the workspace and judges
+   * the result. The command runner is injected so this is unit-testable without dbt.
+   * NEVER throws — a verifier crash must not break the run (fail-open to a soft verdict).
+   */
+  export function dbtVerifier(run: (cmd: string, workdir: string) => Promise<RunResult>): Impl {
+    return {
+      async verify(workdir: string): Promise<Verdict> {
+        try {
+          const r = await run("dbt build", workdir)
+          return fromDbt(r.output, r.exitCode)
+        } catch (e) {
+          // Fail-open: can't verify → don't block, but mark unverifiable so it's not
+          // mistaken for a real pass.
+          return {
+            ok: true,
+            unverifiable: true,
+            strength: Strength.UNVERIFIABLE,
+            decision: Decision.UNDECIDABLE,
+            reason: `verifier error: ${String(e)}`,
+            checks: [],
+            evidence: "verifier-error",
+          }
+        }
+      },
+    }
+  }
+}
diff --git a/packages/opencode/test/router/equivalence-verifier.test.ts b/packages/opencode/test/router/equivalence-verifier.test.ts
new file mode 100644
index 000000000..9ea1b925e
--- /dev/null
+++ b/packages/opencode/test/router/equivalence-verifier.test.ts
@@ -0,0 +1,108 @@
+import { describe, expect, test } from "bun:test"
+import { Verifier } from "../../src/router/verifier"
+import { EquivalenceVerifier } from "../../src/router/equivalence-verifier"
+
+const { Decision, Strength } = Verifier
+
+// a fallback that records whether it was consulted
+function recordingFallback(verdict: Verifier.Verdict) {
+  let called = false
+  const impl: Verifier.Impl = { verify: async () => { called = true; return verdict } }
+  return { impl, called: () => called }
+}
+
+const resolver = (pairs: EquivalenceVerifier.Pair[] | null): EquivalenceVerifier.ReferenceResolver => ({
+  resolve: async () => pairs,
+})
+
+describe("EquivalenceVerifier", () => {
+  test("all models proven equivalent → OK at EQUIVALENCE strength, no fallback", async () => {
+    const fb = recordingFallback({ ok: true, checks: [] })
+    const impl = EquivalenceVerifier.create(
+      async () => ({ equivalent: true, confidence: 0.95 }),
+      resolver([{ model: "m1", baseSql: "a", headSql: "a" }]),
+      fb.impl,
+    )
+    const v = await impl.verify("/ws")
+    expect(v.decision).toBe(Decision.OK)
+    expect(v.strength).toBe(Strength.EQUIVALENCE)
+    expect(v.ok).toBe(true)
+    expect(fb.called()).toBe(false)
+  })
+
+  test("a proven material difference → PROVEN_DIFFERENT, no fallback (escalation-worthy)", async () => {
+    const fb = recordingFallback({ ok: true, checks: [] })
+    const impl = EquivalenceVerifier.create(
+      async () => ({ equivalent: false, differences: [{ severity: "semantic", description: "extra filter" }] }),
+      resolver([{ model: "m1", baseSql: "a", headSql: "b" }]),
+      fb.impl,
+    )
+    const v = await impl.verify("/ws")
+    expect(v.decision).toBe(Decision.PROVEN_DIFFERENT)
+    expect(v.ok).toBe(false)
+    expect(fb.called()).toBe(false)
+  })
+
+  test("undecidable equivalence + fallback PASSES → OK at BUILD strength (ok⟺OK invariant holds)", async () => {
+    const fb = recordingFallback({ ok: true, strength: Strength.BUILD, decision: Decision.OK, checks: [{ name: "dbt build", ok: true }] })
+    const impl = EquivalenceVerifier.create(
+      async () => ({ equivalent: false, validation_errors: ["unsupported: STRFTIME"] }),
+      resolver([{ model: "m1", baseSql: "a", headSql: "b" }]),
+      fb.impl,
+    )
+    const v = await impl.verify("/ws")
+    expect(fb.called()).toBe(true)
+    expect(v.ok).toBe(true)
+    expect(v.decision).toBe(Decision.OK) // accepted; NOT silently UNDECIDABLE (ok⟺OK)
+    expect(v.strength).toBe(Strength.BUILD) // the "equivalence couldn't decide" fact lives here
+    expect(v.reason).toContain("undecidable")
+  })
+
+  test("undecidable equivalence + fallback FAILS → FAILED (must escalate, not be swallowed)", async () => {
+    const fb = recordingFallback({ ok: false, strength: Strength.BUILD, decision: Decision.FAILED, checks: [{ name: "dbt build", ok: false }] })
+    const impl = EquivalenceVerifier.create(
+      async () => ({ equivalent: false, validation_errors: ["unsupported: STRFTIME"] }),
+      resolver([{ model: "m1", baseSql: "a", headSql: "b" }]),
+      fb.impl,
+    )
+    const v = await impl.verify("/ws")
+    expect(fb.called()).toBe(true)
+    expect(v.ok).toBe(false)
+    expect(v.decision).toBe(Decision.FAILED) // a real build failure must surface as FAILED so the router escalates
+  })
+
+  test("greenfield (no reference) → uses fallback verifier directly", async () => {
+    const fb = recordingFallback({ ok: true, strength: Strength.BUILD, decision: Decision.OK, checks: [] })
+    const impl = EquivalenceVerifier.create(async () => ({ equivalent: true }), resolver(null), fb.impl)
+    const v = await impl.verify("/ws")
+    expect(fb.called()).toBe(true)
+    expect(v.strength).toBe(Strength.BUILD)
+  })
+
+  test("equivalence engine throw on one model → undecidable (NOT 'different'), routes to fallback", async () => {
+    const fb = recordingFallback({ ok: true, strength: Strength.BUILD, decision: Decision.OK, checks: [] })
+    const impl = EquivalenceVerifier.create(
+      async () => { throw new Error("napi panic") },
+      resolver([{ model: "m1", baseSql: "a", headSql: "b" }]),
+      fb.impl,
+    )
+    const v = await impl.verify("/ws")
+    // engine error ⇒ undecidable (NOT PROVEN_DIFFERENT) ⇒ fallback consulted, decision from fallback
+    expect(fb.called()).toBe(true)
+    expect(v.decision).not.toBe(Decision.PROVEN_DIFFERENT)
+    expect(v.decision).toBe(Decision.OK) // fallback passed → accepted at BUILD strength
+    expect(v.strength).toBe(Strength.BUILD)
+  })
+
+  test("resolver throw → degrade to fallback (fail-open, honest)", async () => {
+    const fb = recordingFallback({ ok: true, unverifiable: true, strength: Strength.UNVERIFIABLE, decision: Decision.UNDECIDABLE, checks: [] })
+    const impl = EquivalenceVerifier.create(
+      async () => ({ equivalent: true }),
+      { resolve: async () => { throw new Error("git failed") } },
+      fb.impl,
+    )
+    const v = await impl.verify("/ws")
+    expect(fb.called()).toBe(true)
+    expect(v.strength).toBe(Strength.UNVERIFIABLE)
+  })
+})
diff --git a/packages/opencode/test/router/policy.e2e.test.ts b/packages/opencode/test/router/policy.e2e.test.ts
new file mode 100644
index 000000000..a72f5cefe
--- /dev/null
+++ b/packages/opencode/test/router/policy.e2e.test.ts
@@ -0,0 +1,109 @@
+import { afterAll, afterEach, beforeAll, describe, expect, test } from "bun:test"
+import { Policy } from "../../src/router/policy"
+import { Router } from "../../src/router/router"
+
+// REAL network: a live local HTTP server (Bun.serve) + the real (unreachable) api.altimate.ai.
+let server: ReturnType<typeof Bun.serve>
+let base = ""
+let mode = "good"
+const outcomes: any[] = []
+
+beforeAll(() => {
+  server = Bun.serve({
+    port: 0,
+    async fetch(req) {
+      const url = new URL(req.url)
+      if (url.pathname.endsWith("/outcomes")) {
+        outcomes.push(await req.json().catch(() => null))
+        return new Response("{}", { status: 200 })
+      }
+      switch (mode) {
+        case "good":
+          return Response.json({ tiers: [{ model: "openrouter/acme/fast", label: "acme-fast" }, { model: "openrouter/acme/strong" }] })
+        case "500":
+          return new Response("upstream error", { status: 500 })
+        case "malformed":
+          return new Response("not json {{{", { status: 200 })
+        case "empty":
+          return Response.json({ tiers: [] })
+        case "garbage":
+          return Response.json({ tiers: [{ nope: 1 }, "str", null, { model: "" }, { model: 123 }] })
+        case "bomb":
+          return Response.json({ tiers: Array.from({ length: 1000 }, (_, i) => ({ model: `openrouter/x/m${i}` })) })
+        case "injection":
+          return Response.json({ tiers: [{ model: "openrouter/evil/m", label: "<script>alert(1)</script>" }] })
+        default:
+          return new Response("", { status: 404 })
+      }
+    },
+  })
+  base = `http://localhost:${server.port}`
+})
+afterAll(() => server?.stop(true))
+afterEach(() => {
+  delete process.env["ALTIMATE_API_KEY"]
+  delete process.env["ALTIMATE_ROUTER_LADDER"]
+})
+
+const STATIC0 = Router.DEFAULT_LADDER[0].label
+
+describe("Policy × REAL network (no mocks)", () => {
+  test("resolve() is static with no key (no network)", () => {
+    expect(Policy.resolve().source).toBe("static")
+  })
+
+  test("good endpoint: fetches the customer ladder over real HTTP", async () => {
+    mode = "good"
+    const tiers = await Policy.altimate("k", base).tiers({ taskId: "t" })
+    expect(tiers[0].label).toBe("acme-fast")
+    expect(tiers[1].label).toBe("strong") // label derived from model
+  })
+
+  test("real UNREACHABLE endpoint (api.altimate.ai) → graceful fallback to static", async () => {
+    const tiers = await Policy.altimate("k", "https://api.altimate.ai").tiers({})
+    expect(tiers[0].label).toBe(STATIC0)
+  }, 30_000)
+
+  test("reportOutcome posts to a real server when keyed; best-effort (no throw) when unreachable", async () => {
+    process.env["ALTIMATE_API_KEY"] = "k"
+    await Policy.reportOutcome(
+      { schemaVersion: "1", solved: true, solvedBy: "glm-5.1", tier: 1, unverifiable: false, attempts: [], checks: [], evidenceHash: "djb2:0", createdAt: "t" },
+      base,
+    )
+    expect(outcomes.at(-1)?.solvedBy).toBe("glm-5.1")
+    // unreachable host must not throw
+    await Policy.reportOutcome(
+      { schemaVersion: "1", solved: false, solvedBy: null, tier: null, unverifiable: false, attempts: [], checks: [], evidenceHash: "djb2:0", createdAt: "t" },
+      "https://api.altimate.ai",
+    )
+  }, 30_000)
+})
+
+describe("Policy × REAL network — ADVERSARIAL endpoint responses", () => {
+  const cases: [string, (t: Router.Tier[]) => void][] = [
+    ["500", (t) => expect(t[0].label).toBe(STATIC0)],
+    ["malformed", (t) => expect(t[0].label).toBe(STATIC0)],
+    ["empty", (t) => expect(t[0].label).toBe(STATIC0)],
+    ["garbage", (t) => expect(t[0].label).toBe(STATIC0)], // no valid model → fallback
+  ]
+  for (const [m, assert] of cases) {
+    test(`'${m}' response → graceful fallback to static`, async () => {
+      mode = m
+      assert(await Policy.altimate("k", base).tiers({}))
+    })
+  }
+
+  test("'bomb' (1000-tier cost bomb) → capped to MAX_TIERS", async () => {
+    mode = "bomb"
+    const tiers = await Policy.altimate("k", base).tiers({})
+    expect(tiers.length).toBe(Policy.MAX_TIERS)
+  })
+
+  test("'injection' label → kept as inert string, does not crash; single tier", async () => {
+    mode = "injection"
+    const tiers = await Policy.altimate("k", base).tiers({})
+    expect(tiers).toHaveLength(1)
+    expect(tiers[0].model).toBe("openrouter/evil/m")
+    expect(typeof tiers[0].label).toBe("string")
+  })
+})
diff --git a/packages/opencode/test/router/policy.test.ts b/packages/opencode/test/router/policy.test.ts
new file mode 100644
index 000000000..310409319
--- /dev/null
+++ b/packages/opencode/test/router/policy.test.ts
@@ -0,0 +1,117 @@
+import { afterEach, describe, expect, test } from "bun:test"
+import { Policy } from "../../src/router/policy"
+import { Router } from "../../src/router/router"
+import type { Verdict } from "../../src/router/verdict"
+
+afterEach(() => {
+  delete process.env["ALTIMATE_API_KEY"]
+  delete process.env["ALTIMATE_API_URL"]
+  delete process.env["ALTIMATE_ROUTER_LADDER"]
+})
+
+function fakeFetch(handler: (url: string, init: any) => { ok: boolean; json: () => any }) {
+  const calls: { url: string; init: any }[] = []
+  const fn = (async (url: string, init: any) => {
+    calls.push({ url, init })
+    const r = handler(url, init)
+    return { ok: r.ok, json: async () => r.json() } as any
+  }) as unknown as typeof fetch
+  return Object.assign(fn, { calls })
+}
+
+describe("Policy.STATIC", () => {
+  test("returns the calibrated default ladder", async () => {
+    const tiers = await Policy.STATIC.tiers({})
+    expect(tiers[0].label).toBe("deepseek-v4-flash")
+    expect(Policy.STATIC.source).toBe("static")
+  })
+  test("honors the env ladder override", async () => {
+    process.env["ALTIMATE_ROUTER_LADDER"] = "openrouter/x/y"
+    expect((await Policy.STATIC.tiers({}))[0].label).toBe("y")
+  })
+})
+
+describe("Policy.sanitizeTiers (defense against bad/compromised endpoint)", () => {
+  test("keeps valid tiers + derives missing labels", () => {
+    const t = Policy.sanitizeTiers([{ model: "p/a", label: "A" }, { model: "p/b" }])
+    expect(t).toEqual([{ model: "p/a", label: "A" }, { model: "p/b", label: "b" }])
+  })
+  test("filters entries without a usable string model", () => {
+    expect(Policy.sanitizeTiers([{ nope: 1 }, "str", null, { model: "" }, { model: 123 }, { model: "p/ok" }])).toEqual([
+      { model: "p/ok", label: "ok" },
+    ])
+  })
+  test("caps a cost-bomb ladder to MAX_TIERS", () => {
+    const big = Array.from({ length: 1000 }, (_, i) => ({ model: `p/m${i}` }))
+    expect(Policy.sanitizeTiers(big)!).toHaveLength(Policy.MAX_TIERS)
+  })
+  test("returns null for non-array / all-invalid (caller falls back to static)", () => {
+    expect(Policy.sanitizeTiers(null)).toBeNull()
+    expect(Policy.sanitizeTiers("nope")).toBeNull()
+    expect(Policy.sanitizeTiers([{ nope: 1 }])).toBeNull()
+  })
+  test("rejects malformed model ids (no slash, whitespace, control chars, over-long)", () => {
+    expect(Policy.sanitizeTiers([{ model: "noslash" }])).toBeNull()
+    expect(Policy.sanitizeTiers([{ model: "p/ a" }])).toBeNull()
+    expect(Policy.sanitizeTiers([{ model: "p/[31mx" }])).toBeNull()
+    expect(Policy.sanitizeTiers([{ model: "p/" + "x".repeat(300) }])).toBeNull()
+  })
+  test("strips non-printable/ANSI from label (printed to terminal)", () => {
+    const t = Policy.sanitizeTiers([{ model: "p/evil", label: "ok[31mbad" }])
+    expect(t![0].label).toBe("ok[31mbad") // ESC + BEL stripped, printable kept
+  })
+})
+
+describe("Policy.resolve", () => {
+  test("static when no altimate key", () => {
+    expect(Policy.resolve().source).toBe("static")
+  })
+  test("altimate (customer) policy when key present", () => {
+    process.env["ALTIMATE_API_KEY"] = "sk-altimate-test"
+    expect(Policy.resolve().source).toBe("altimate")
+  })
+})
+
+describe("Policy.altimate (customer policy)", () => {
+  test("fetches the per-context ladder with auth", async () => {
+    const ff = fakeFetch(() => ({
+      ok: true,
+      json: () => ({ tiers: [{ model: "openrouter/acme/fast", label: "acme-fast" }] }),
+    }))
+    const p = Policy.altimate("sk-acme", "https://api.altimate.ai", ff)
+    const tiers = await p.tiers({ taskId: "t1", projectType: "dbt" })
+    expect(tiers[0].label).toBe("acme-fast")
+    expect(ff.calls[0].url).toContain("/v1/router/policy")
+    expect(ff.calls[0].init.headers.Authorization).toBe("Bearer sk-acme")
+  })
+  test("falls back to static ladder on non-ok response", async () => {
+    const ff = fakeFetch(() => ({ ok: false, json: () => ({}) }))
+    const tiers = await Policy.altimate("k", "https://api.altimate.ai", ff).tiers({})
+    expect(tiers[0].label).toBe(Router.DEFAULT_LADDER[0].label)
+  })
+  test("falls back to static ladder when transport throws", async () => {
+    const boom = (async () => {
+      throw new Error("network down")
+    }) as unknown as typeof fetch
+    const tiers = await Policy.altimate("k", "https://api.altimate.ai", boom).tiers({})
+    expect(tiers[0].label).toBe(Router.DEFAULT_LADDER[0].label)
+  })
+})
+
+describe("Policy.reportOutcome", () => {
+  const env: Verdict.Envelope = {
+    schemaVersion: "1", solved: true, solvedBy: "glm-5.1", tier: 1, unverifiable: false, attempts: [], checks: [], evidenceHash: "djb2:0", createdAt: "2026-05-31T00:00:00Z",
+  }
+  test("no-op without a key", async () => {
+    const ff = fakeFetch(() => ({ ok: true, json: () => ({}) }))
+    await Policy.reportOutcome(env, "https://api.altimate.ai", ff)
+    expect(ff.calls).toHaveLength(0)
+  })
+  test("posts the verdict envelope when a key is set", async () => {
+    process.env["ALTIMATE_API_KEY"] = "sk-acme"
+    const ff = fakeFetch(() => ({ ok: true, json: () => ({}) }))
+    await Policy.reportOutcome(env, "https://api.altimate.ai", ff)
+    expect(ff.calls[0].url).toContain("/v1/router/outcomes")
+    expect(JSON.parse(ff.calls[0].init.body).solvedBy).toBe("glm-5.1")
+  })
+})
diff --git a/packages/opencode/test/router/reference.test.ts b/packages/opencode/test/router/reference.test.ts
new file mode 100644
index 000000000..deeffe31d
--- /dev/null
+++ b/packages/opencode/test/router/reference.test.ts
@@ -0,0 +1,92 @@
+import { describe, expect, test } from "bun:test"
+import { ReferenceResolver } from "../../src/router/reference"
+
+const deps = (over: Partial<ReferenceResolver.Deps>): ReferenceResolver.Deps => ({
+  baseRef: async () => "main",
+  changedModels: async () => ["m1"],
+  compiledSql: async (_w, ref) => new Map([["m1", ref === "WORKING" ? "select 1 as a" : "select 1 as b"]]),
+  schema: async () => ({ schema: true }),
+  ...over,
+})
+
+describe("ReferenceResolver", () => {
+  test("no base ref → null (greenfield, caller uses build verifier)", async () => {
+    const r = ReferenceResolver.create(deps({ baseRef: async () => null }))
+    expect(await r.resolve("/ws")).toBeNull()
+  })
+
+  test("base exists but nothing changed → [] (nothing to verify)", async () => {
+    const r = ReferenceResolver.create(deps({ changedModels: async () => [] }))
+    expect(await r.resolve("/ws")).toEqual([])
+  })
+
+  test("changed model present on both sides → one pair with base/head compiled SQL + schema", async () => {
+    const r = ReferenceResolver.create(deps({}))
+    const pairs = await r.resolve("/ws")
+    expect(pairs).toHaveLength(1)
+    expect(pairs![0]).toMatchObject({ model: "m1", baseSql: "select 1 as b", headSql: "select 1 as a" })
+    expect(pairs![0].schema).toEqual({ schema: true })
+  })
+
+  test("model new on head (no base compiled) is skipped — not equivalence-checkable", async () => {
+    const r = ReferenceResolver.create(
+      deps({
+        changedModels: async () => ["m1", "m_new"],
+        compiledSql: async (_w, ref) =>
+          ref === "WORKING"
+            ? new Map([["m1", "select 1"], ["m_new", "select 2"]])
+            : new Map([["m1", "select 1 old"]]), // m_new absent at base
+      }),
+    )
+    const pairs = await r.resolve("/ws")
+    expect(pairs!.map((p) => p.model)).toEqual(["m1"]) // m_new dropped
+  })
+})
+
+describe("ReferenceResolver.gitDbtDeps (orchestration, mocked exec)", () => {
+  const mkExec = (calls: string[][], outputs: Record<string, { stdout: string; code: number }>) =>
+    (async (cmd: string, args: string[]) => {
+      calls.push([cmd, ...args])
+      return outputs[`${cmd} ${args[0]}`] ?? { stdout: "", code: 0 }
+    }) as ReferenceResolver.Exec
+
+  const baseOpts = (over: Partial<ReferenceResolver.GitDbtOptions> = {}): ReferenceResolver.GitDbtOptions => ({
+    readCompiled: async () => new Map([["m1", "select 1"]]),
+    buildSchema: async () => ({ schema: true }),
+    checkoutBase: async () => ({ dir: "/tmp/base", cleanup: async () => {} }),
+    ...over,
+  })
+
+  test("baseRef: HEAD present → sha; absent → null (greenfield)", async () => {
+    const d1 = ReferenceResolver.gitDbtDeps(mkExec([], { "git rev-parse": { stdout: "abc123\n", code: 0 } }), baseOpts())
+    expect(await d1.baseRef("/ws")).toBe("abc123")
+    const d2 = ReferenceResolver.gitDbtDeps(mkExec([], { "git rev-parse": { stdout: "", code: 128 } }), baseOpts())
+    expect(await d2.baseRef("/ws")).toBeNull()
+  })
+
+  test("changedModels: parses git diff to bare model names, filters non-.sql", async () => {
+    const d = ReferenceResolver.gitDbtDeps(
+      mkExec([], { "git diff": { stdout: "models/agg/m1.sql\nmodels/schema.yml\nmodels/dim/m2.sql\n", code: 0 } }),
+      baseOpts(),
+    )
+    expect(await d.changedModels("/ws", "HEAD")).toEqual(["m1", "m2"])
+  })
+
+  test("compiledSql WORKING → dbt compile in workdir then readCompiled", async () => {
+    const calls: string[][] = []
+    const d = ReferenceResolver.gitDbtDeps(mkExec(calls, {}), baseOpts())
+    const sql = await d.compiledSql("/ws", "WORKING")
+    expect(sql.get("m1")).toBe("select 1")
+    expect(calls.some((c) => c[0] === "dbt" && c[1] === "compile")).toBe(true)
+  })
+
+  test("compiledSql base → checkout, deps+compile in the checkout, cleanup always runs", async () => {
+    let cleaned = false
+    const d = ReferenceResolver.gitDbtDeps(
+      mkExec([], {}),
+      baseOpts({ checkoutBase: async () => ({ dir: "/tmp/base", cleanup: async () => { cleaned = true } }) }),
+    )
+    await d.compiledSql("/ws", "abc123")
+    expect(cleaned).toBe(true)
+  })
+})
diff --git a/packages/opencode/test/router/router.e2e.test.ts b/packages/opencode/test/router/router.e2e.test.ts
new file mode 100644
index 000000000..c66b7c038
--- /dev/null
+++ b/packages/opencode/test/router/router.e2e.test.ts
@@ -0,0 +1,115 @@
+import { afterAll, beforeAll, describe, expect, test } from "bun:test"
+import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from "node:fs"
+import { tmpdir } from "node:os"
+import { join } from "node:path"
+import { Router } from "../../src/router/router"
+import { Verifier } from "../../src/router/verifier"
+import { Verdict } from "../../src/router/verdict"
+
+// REAL OpenRouter calls + REAL dbt. No mocks.
+const KEY = process.env["OPENROUTER_API_KEY"] || ""
+const IMG = process.env["E2E_IMG"] || "" // provide a docker image with dbt-duckdb; no default
+const OR = "https://openrouter.ai/api/v1"
+
+const dirs: string[] = []
+function project(models: Record<string, string>): string {
+  const dir = mkdtempSync(join(tmpdir(), "e2e-router-"))
+  dirs.push(dir)
+  writeFileSync(join(dir, "dbt_project.yml"), `name: e2e\nprofile: e2e\nversion: "1.0"\nflags:\n  send_anonymous_usage_stats: false\nmodels:\n  e2e:\n    +materialized: table\n`)
+  writeFileSync(join(dir, "profiles.yml"), `e2e:\n  target: dev\n  outputs:\n    dev:\n      type: duckdb\n      path: /proj/e2e.duckdb\n`)
+  mkdirSync(join(dir, "models"))
+  for (const [n, sql] of Object.entries(models)) writeFileSync(join(dir, "models", n), sql)
+  return dir
+}
+
+async function realVerify(dir: string): Promise<Verifier.Verdict> {
+  return Verifier.dbtVerifier((cmd, workdir) => {
+    const p = Bun.spawnSync(
+      ["docker", "run", "--rm", "-v", `${workdir}:/proj`, "-w", "/proj", IMG, "bash", "-lc", `${cmd} --profiles-dir /proj 2>&1`],
+      { stdout: "pipe", stderr: "pipe" },
+    )
+    return Promise.resolve({ output: (p.stdout?.toString() ?? "") + (p.stderr?.toString() ?? ""), exitCode: p.exitCode ?? 1 })
+  }).verify(dir)
+}
+
+function extractSql(s: string): string {
+  const fenced = s.match(/```(?:sql)?\s*([\s\S]*?)```/i)
+  return (fenced ? fenced[1] : s).trim()
+}
+
+// A real model call that writes the requested dbt model into the workspace.
+async function realRunAgent(model: string, note: string | undefined, dir: string, task: string, log: { model: string; note?: string }[]) {
+  log.push({ model, note })
+  const apiModel = model.replace(/^openrouter\//, "")
+  const res = await fetch(`${OR}/chat/completions`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json", Authorization: `Bearer ${KEY}` },
+    body: JSON.stringify({
+      model: apiModel,
+      messages: [
+        { role: "system", content: "You are a dbt engineer. Output ONLY the SQL for the requested model in a ```sql code block. No prose, no schema.yml." },
+        { role: "user", content: task + (note ? `\n\nA PREVIOUS ATTEMPT FAILED VERIFICATION:\n${note}` : "") },
+      ],
+      max_tokens: 600,
+      temperature: 0,
+    }),
+  })
+  const j: any = await res.json()
+  const sql = extractSql(j?.choices?.[0]?.message?.content ?? "select 1 as id")
+  writeFileSync(join(dir, "models", "answer.sql"), sql)
+}
+
+beforeAll(() => {
+  if (!KEY) throw new Error("OPENROUTER_API_KEY required for router E2E")
+  if (!IMG) throw new Error("E2E_IMG not set — provide a docker image with dbt-duckdb")
+  if (Bun.spawnSync(["docker", "image", "inspect", IMG], { stdout: "ignore", stderr: "ignore" }).exitCode !== 0)
+    throw new Error(`image ${IMG} missing`)
+})
+afterAll(() => {
+  for (const d of dirs) try { Bun.spawnSync(["sudo", "rm", "-rf", d]); rmSync(d, { recursive: true, force: true }) } catch {}
+})
+
+describe("Router × REAL OpenRouter + REAL dbt (no mocks)", () => {
+  test("solves at the cheap tier → no escalation (1 real call)", async () => {
+    const dir = project({})
+    const log: { model: string; note?: string }[] = []
+    const task = "Create a dbt model named `answer` that selects the integer 42 aliased as `value`. Materialized as a table."
+    const result = await Router.route({
+      tiers: [{ model: "openrouter/deepseek/deepseek-v4-flash", label: "deepseek-v4-flash" }],
+      runAgent: (m, note) => realRunAgent(m, note, dir, task, log),
+      verify: () => realVerify(dir),
+    })
+    expect(result.solved).toBe(true)
+    expect(result.solvedBy?.label).toBe("deepseek-v4-flash")
+    expect(log).toHaveLength(1) // only the cheap tier ran
+    // verdict envelope from a real run
+    const env = Verdict.build(result, { now: "2026-05-31T00:00:00Z" })
+    expect(env.solved).toBe(true)
+    expect(env.tier).toBe(0)
+  }, 180_000)
+
+  test("ADVERSARIAL: unsatisfiable workspace → escalates through every real tier, caps, threads failure context", async () => {
+    // An unrelated, locked broken model makes verification fail no matter what the agent writes,
+    // forcing real escalation through both tiers. Tests real multi-model escalation + capping +
+    // that the exact failing node is handed to the next real model.
+    const dir = project({ "locked_broken.sql": "select notacolumn as x" })
+    const log: { model: string; note?: string }[] = []
+    const task = "Create a dbt model named `answer` selecting 1 as id."
+    const result = await Router.route({
+      tiers: [
+        { model: "openrouter/deepseek/deepseek-v4-flash", label: "deepseek-v4-flash" },
+        { model: "openrouter/z-ai/glm-5.1", label: "glm-5.1" },
+      ],
+      runAgent: (m, note) => realRunAgent(m, note, dir, task, log),
+      verify: () => realVerify(dir),
+    })
+    expect(result.solved).toBe(false) // genuinely unsolvable here
+    expect(result.attempts).toHaveLength(2) // escalated through BOTH real tiers
+    expect(log.map((l) => l.model)).toEqual([
+      "openrouter/deepseek/deepseek-v4-flash",
+      "openrouter/z-ai/glm-5.1",
+    ])
+    expect(log[0].note).toBeUndefined()
+    expect(log[1].note ?? "").toMatch(/locked_broken|did not pass/i) // real failing-check context threaded
+  }, 240_000)
+})
diff --git a/packages/opencode/test/router/router.test.ts b/packages/opencode/test/router/router.test.ts
new file mode 100644
index 000000000..91779c3d5
--- /dev/null
+++ b/packages/opencode/test/router/router.test.ts
@@ -0,0 +1,111 @@
+import { afterEach, describe, expect, test } from "bun:test"
+import { Router } from "../../src/router/router"
+import { Verifier } from "../../src/router/verifier"
+
+const OK: Verifier.Verdict = { ok: true, checks: [{ name: "dbt build", ok: true }] }
+const FAIL: Verifier.Verdict = {
+  ok: false,
+  reason: "1 dbt error(s) — failing: not_null_x",
+  checks: [{ name: "not_null_x", ok: false, detail: "Failure in test not_null_x" }],
+}
+
+afterEach(() => {
+  delete process.env["ALTIMATE_ROUTER"]
+  delete process.env["ALTIMATE_ROUTER_LADDER"]
+})
+
+describe("Router config", () => {
+  test("enabled reads the flag", () => {
+    expect(Router.enabled()).toBe(false)
+    process.env["ALTIMATE_ROUTER"] = "1"
+    expect(Router.enabled()).toBe(true)
+  })
+  test("default ladder is cheap→strong", () => {
+    expect(Router.DEFAULT_LADDER[0].label).toBe("deepseek-v4-flash")
+    expect(Router.DEFAULT_LADDER.at(-1)!.label).toBe("claude-opus-4.8")
+  })
+  test("ladder() honors env override", () => {
+    process.env["ALTIMATE_ROUTER_LADDER"] = "openrouter/a/m1, openrouter/b/m2"
+    const l = Router.ladder()
+    expect(l.map((t) => t.label)).toEqual(["m1", "m2"])
+  })
+})
+
+describe("Router.shouldEscalate", () => {
+  const tiers = Router.DEFAULT_LADDER
+  test("escalates on failure with tiers remaining", () => {
+    expect(Router.shouldEscalate(FAIL, 0, tiers)).toBe(true)
+  })
+  test("does not escalate on success", () => {
+    expect(Router.shouldEscalate(OK, 0, tiers)).toBe(false)
+  })
+  test("does not escalate past the top tier", () => {
+    expect(Router.shouldEscalate(FAIL, tiers.length - 1, tiers)).toBe(false)
+  })
+})
+
+describe("Router.escalationContext", () => {
+  test("names the failing checks + reason for the next tier", () => {
+    const ctx = Router.escalationContext({ model: "m", label: "deepseek-v4-flash" }, FAIL)
+    expect(ctx).toContain("deepseek-v4-flash")
+    expect(ctx).toContain("not_null_x")
+    expect(ctx).toContain("do not start over")
+  })
+})
+
+describe("Router.route", () => {
+  test("stops at tier 0 when it passes (no escalation)", async () => {
+    const models: string[] = []
+    const r = await Router.route({
+      tiers: Router.DEFAULT_LADDER,
+      runAgent: async (m) => void models.push(m),
+      verify: async () => OK,
+    })
+    expect(r.solved).toBe(true)
+    expect(r.solvedBy!.label).toBe("deepseek-v4-flash")
+    expect(models).toHaveLength(1) // only the cheap tier ran
+  })
+
+  test("escalates through tiers until one passes, threading failure context", async () => {
+    const calls: { model: string; note?: string }[] = []
+    let n = 0
+    const r = await Router.route({
+      tiers: Router.DEFAULT_LADDER,
+      runAgent: async (model, note) => void calls.push({ model, note }),
+      verify: async () => (++n >= 2 ? OK : FAIL), // tier0 fails, tier1 passes
+    })
+    expect(r.solved).toBe(true)
+    expect(r.solvedBy!.label).toBe("glm-5.1")
+    expect(calls).toHaveLength(2)
+    expect(calls[0].note).toBeUndefined()
+    expect(calls[1].note).toContain("not_null_x") // tier1 got the failure context
+  })
+
+  test("a thrown runAgent error becomes a failed attempt and escalates (does not abort)", async () => {
+    const calls: string[] = []
+    let n = 0
+    const r = await Router.route({
+      tiers: Router.DEFAULT_LADDER,
+      runAgent: async (m) => {
+        calls.push(m)
+        if (++n === 1) throw new Error("model API down") // tier 0 throws
+      },
+      verify: async () => OK, // tier 1 verifies ok
+    })
+    expect(r.solved).toBe(true)
+    expect(r.solvedBy!.label).toBe("glm-5.1") // escalated past the throwing tier
+    expect(calls).toHaveLength(2)
+    expect(r.attempts[0].verdict.ok).toBe(false)
+    expect(r.attempts[0].verdict.reason).toContain("tier error")
+  })
+
+  test("unsolved when every tier fails (records all attempts)", async () => {
+    const r = await Router.route({
+      tiers: Router.DEFAULT_LADDER,
+      runAgent: async () => {},
+      verify: async () => FAIL,
+    })
+    expect(r.solved).toBe(false)
+    expect(r.attempts).toHaveLength(Router.DEFAULT_LADDER.length)
+  })
+})
diff --git a/packages/opencode/test/router/verdict-strength.test.ts b/packages/opencode/test/router/verdict-strength.test.ts
new file mode 100644
index 000000000..c54977855
--- /dev/null
+++ b/packages/opencode/test/router/verdict-strength.test.ts
@@ -0,0 +1,115 @@
+import { describe, expect, test } from "bun:test"
+import { Verifier } from "../../src/router/verifier"
+import { Router } from "../../src/router/router"
+import { Verdict } from "../../src/router/verdict"
+
+const { Strength, Decision } = Verifier
+const PASS = "Done. PASS=5 WARN=0 ERROR=0 SKIP=0 TOTAL=5"
+const FAIL = "Failure in test not_null_x (models/schema.yml)\nDone. PASS=4 WARN=0 ERROR=1 SKIP=0 TOTAL=5"
+
+describe("fromDbt sets strength + decision", () => {
+  test("clean build → BUILD / OK", () => {
+    const v = Verifier.fromDbt(PASS, 0)
+    expect(v.ok).toBe(true)
+    expect(v.strength).toBe(Strength.BUILD)
+    expect(v.decision).toBe(Decision.OK)
+  })
+  test("failed build → BUILD / FAILED", () => {
+    const v = Verifier.fromDbt(FAIL, 1)
+    expect(v.ok).toBe(false)
+    expect(v.strength).toBe(Strength.BUILD)
+    expect(v.decision).toBe(Decision.FAILED)
+  })
+  // Regression: non-zero exit with a CLEAN summary (e.g. dbt killed/OOM mid-run) must
+  // still be FAILED with a reason set — the `else if (exitCode !== 0)` branch IS reachable.
+  test("non-zero exit + clean summary → FAILED with reason (not a silent pass)", () => {
+    const v = Verifier.fromDbt("Done. PASS=5 WARN=0 ERROR=0 SKIP=0 TOTAL=5", 5)
+    expect(v.ok).toBe(false)
+    expect(v.decision).toBe(Decision.FAILED)
+    expect(v.reason).toContain("exited 5")
+  })
+})
+
+describe("fromEquivalence folds per-model results soundly", () => {
+  test("all equivalent → OK at EQUIVALENCE strength", () => {
+    const v = Verifier.fromEquivalence([
+      { model: "a", result: { equivalent: true, confidence: 0.95 } },
+      { model: "b", result: { equivalent: true, confidence: 0.9 } },
+    ])
+    expect(v.ok).toBe(true)
+    expect(v.decision).toBe(Decision.OK)
+    expect(v.strength).toBe(Strength.EQUIVALENCE)
+    expect(v.confidence).toBe(0.9) // min across models
+  })
+
+  test("a material difference → PROVEN_DIFFERENT, not ok", () => {
+    const v = Verifier.fromEquivalence([
+      { model: "a", result: { equivalent: true } },
+      { model: "b", result: { equivalent: false, differences: [{ description: "extra row" }] } },
+    ])
+    expect(v.ok).toBe(false)
+    expect(v.decision).toBe(Decision.PROVEN_DIFFERENT)
+    expect(v.strength).toBe(Strength.EQUIVALENCE)
+    expect(v.reason).toContain("b")
+  })
+
+  test("validation errors → UNDECIDABLE (NOT different), drops to BUILD strength", () => {
+    const v = Verifier.fromEquivalence([
+      { model: "a", result: { equivalent: false, validation_errors: ["unsupported: QUALIFY"] } },
+    ])
+    expect(v.ok).toBe(false)
+    expect(v.decision).toBe(Decision.UNDECIDABLE)
+    expect(v.strength).toBe(Strength.BUILD)
+  })
+
+  test("proven-different outranks undecidable", () => {
+    const v = Verifier.fromEquivalence([
+      { model: "a", result: { equivalent: false, validation_errors: ["undecidable"] } },
+      { model: "b", result: { equivalent: false, differences: [{ severity: "Semantic" }] } },
+    ])
+    expect(v.decision).toBe(Decision.PROVEN_DIFFERENT)
+  })
+
+  test("no reference resolved → UNDECIDABLE / UNVERIFIABLE (never silent pass)", () => {
+    const v = Verifier.fromEquivalence([])
+    expect(v.ok).toBe(false)
+    expect(v.decision).toBe(Decision.UNDECIDABLE)
+    expect(v.strength).toBe(Strength.UNVERIFIABLE)
+  })
+})
+
+describe("Router.shouldEscalate is decision-aware", () => {
+  const tiers: Router.Tier[] = [{ model: "m1", label: "m1" }, { model: "m2", label: "m2" }]
+  const mk = (decision: Verifier.Decision): Verifier.Verdict => ({ ok: decision === Decision.OK, decision, checks: [] })
+
+  test("FAILED escalates", () => expect(Router.shouldEscalate(mk(Decision.FAILED), 0, tiers)).toBe(true))
+  test("PROVEN_DIFFERENT escalates", () => expect(Router.shouldEscalate(mk(Decision.PROVEN_DIFFERENT), 0, tiers)).toBe(true))
+  test("UNDECIDABLE does NOT escalate (fallback, not stronger model)", () =>
+    expect(Router.shouldEscalate(mk(Decision.UNDECIDABLE), 0, tiers)).toBe(false))
+  test("OK does NOT escalate", () => expect(Router.shouldEscalate(mk(Decision.OK), 0, tiers)).toBe(false))
+  test("never escalates past the last tier", () =>
+    expect(Router.shouldEscalate(mk(Decision.FAILED), 1, tiers)).toBe(false))
+  test("legacy verdict without decision falls back to !ok", () => {
+    expect(Router.shouldEscalate({ ok: false, checks: [] }, 0, tiers)).toBe(true)
+    expect(Router.shouldEscalate({ ok: true, checks: [] }, 0, tiers)).toBe(false)
+  })
+})
+
+describe("Verdict.Envelope carries strength + decision (v2)", () => {
+  test("schema version bumped to 2", () => expect(Verdict.SCHEMA_VERSION).toBe("2"))
+  test("envelope records the accepted result's strength + decision", () => {
+    const result: Router.RouteResult = {
+      solved: true,
+      solvedBy: { model: "m1", label: "m1" },
+      attempts: [
+        { tier: { model: "m1", label: "m1" }, verdict: Verifier.fromEquivalence([{ model: "x", result: { equivalent: true, confidence: 0.95 } }]) },
+      ],
+    }
+    const env = Verdict.build(result, { now: "2026-05-31T00:00:00Z" })
+    expect(env.schemaVersion).toBe("2")
+    expect(env.strength).toBe(Strength.EQUIVALENCE)
+    expect(env.decision).toBe(Decision.OK)
+    expect(env.attempts[0].strength).toBe(Strength.EQUIVALENCE)
+    expect(env.attempts[0].decision).toBe(Decision.OK)
+  })
+})
diff --git a/packages/opencode/test/router/verdict.test.ts b/packages/opencode/test/router/verdict.test.ts
new file mode 100644
index 000000000..cbab5d882
--- /dev/null
+++ b/packages/opencode/test/router/verdict.test.ts
@@ -0,0 +1,70 @@
+import { describe, expect, test } from "bun:test"
+import { Verdict } from "../../src/router/verdict"
+import type { Router } from "../../src/router/router"
+
+const NOW = "2026-05-31T05:00:00.000Z"
+
+const solvedResult: Router.RouteResult = {
+  solved: true,
+  solvedBy: { model: "openrouter/z-ai/glm-5.1", label: "glm-5.1" },
+  attempts: [
+    {
+      tier: { model: "openrouter/deepseek/deepseek-v4-flash", label: "deepseek-v4-flash" },
+      verdict: { ok: false, reason: "1 error — failing: not_null_x", checks: [{ name: "not_null_x", ok: false }] },
+    },
+    {
+      tier: { model: "openrouter/z-ai/glm-5.1", label: "glm-5.1" },
+      verdict: { ok: true, checks: [{ name: "dbt build", ok: true }], evidence: "PASS=12 ERROR=0" },
+    },
+  ],
+}
+
+const unsolvedResult: Router.RouteResult = {
+  solved: false,
+  attempts: [
+    {
+      tier: { model: "m", label: "deepseek-v4-flash" },
+      verdict: { ok: false, reason: "fail", checks: [{ name: "t1", ok: false }], evidence: "ERROR=2" },
+    },
+  ],
+}
+
+describe("Verdict.evidenceHash", () => {
+  test("deterministic + prefixed", () => {
+    expect(Verdict.evidenceHash("abc")).toBe(Verdict.evidenceHash("abc"))
+    expect(Verdict.evidenceHash("abc")).toMatch(/^djb2:[0-9a-f]{8}$/)
+    expect(Verdict.evidenceHash("abc")).not.toBe(Verdict.evidenceHash("abd"))
+  })
+})
+
+describe("Verdict.build", () => {
+  test("solved: records solving tier, index, and per-attempt history", () => {
+    const e = Verdict.build(solvedResult, { now: NOW })
+    expect(e.solved).toBe(true)
+    expect(e.solvedBy).toBe("glm-5.1")
+    expect(e.tier).toBe(1)
+    expect(e.attempts).toHaveLength(2)
+    expect(e.attempts[0]).toMatchObject({ model: "deepseek-v4-flash", ok: false, failing: ["not_null_x"] })
+    expect(e.checks[0].ok).toBe(true)
+    expect(e.createdAt).toBe(NOW)
+    expect(e.signature).toBeUndefined()
+  })
+
+  test("unsolved: solvedBy null, tier null", () => {
+    const e = Verdict.build(unsolvedResult, { now: NOW })
+    expect(e.solved).toBe(false)
+    expect(e.solvedBy).toBeNull()
+    expect(e.tier).toBeNull()
+    expect(e.attempts[0].failing).toEqual(["t1"])
+  })
+
+  test("applies an injected signer", () => {
+    const e = Verdict.build(solvedResult, { now: NOW, sign: (u) => "sig-" + u.evidenceHash })
+    expect(e.signature).toContain("sig-djb2:")
+  })
+
+  test("serialize round-trips", () => {
+    const e = Verdict.build(solvedResult, { now: NOW })
+    expect(JSON.parse(Verdict.serialize(e)).solvedBy).toBe("glm-5.1")
+  })
+})
diff --git a/packages/opencode/test/router/verifier.e2e.test.ts b/packages/opencode/test/router/verifier.e2e.test.ts
new file mode 100644
index 000000000..3a042b176
--- /dev/null
+++ b/packages/opencode/test/router/verifier.e2e.test.ts
@@ -0,0 +1,97 @@
+import { afterAll, beforeAll, describe, expect, test } from "bun:test"
+import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from "node:fs"
+import { tmpdir } from "node:os"
+import { join } from "node:path"
+import { Verifier } from "../../src/router/verifier"
+
+// REAL dbt — no mocks. Runs `dbt build` inside a docker image that has dbt-duckdb.
+// Provide the image via E2E_IMG (no default — opt-in, infra-dependent test).
+const IMG = process.env["E2E_IMG"] || ""
+
+/** Real command runner: shells `dbt build` inside the image against a mounted project. */
+function dockerDbtRun(cmd: string, workdir: string): Promise<Verifier.RunResult> {
+  const p = Bun.spawnSync(
+    ["docker", "run", "--rm", "-v", `${workdir}:/proj`, "-w", "/proj", IMG, "bash", "-lc", `${cmd} --profiles-dir /proj 2>&1`],
+    { stdout: "pipe", stderr: "pipe" },
+  )
+  const output = (p.stdout?.toString() ?? "") + (p.stderr?.toString() ?? "")
+  return Promise.resolve({ output, exitCode: p.exitCode ?? 1 })
+}
+
+const dirs: string[] = []
+function project(models: Record<string, string>, schema?: string): string {
+  const dir = mkdtempSync(join(tmpdir(), "e2e-dbt-"))
+  dirs.push(dir)
+  writeFileSync(join(dir, "dbt_project.yml"), `name: e2e\nprofile: e2e\nversion: "1.0"\nflags:\n  send_anonymous_usage_stats: false\nmodels:\n  e2e:\n    +materialized: table\n`)
+  writeFileSync(join(dir, "profiles.yml"), `e2e:\n  target: dev\n  outputs:\n    dev:\n      type: duckdb\n      path: /proj/e2e.duckdb\n`)
+  mkdirSync(join(dir, "models"))
+  for (const [name, sql] of Object.entries(models)) writeFileSync(join(dir, "models", name), sql)
+  if (schema) writeFileSync(join(dir, "models", "schema.yml"), schema)
+  return dir
+}
+
+beforeAll(() => {
+  if (!IMG) throw new Error("E2E_IMG not set — provide a docker image with dbt-duckdb")
+  const ok = Bun.spawnSync(["docker", "image", "inspect", IMG], { stdout: "ignore", stderr: "ignore" })
+  if (ok.exitCode !== 0) throw new Error(`E2E image ${IMG} not present`)
+})
+afterAll(() => {
+  for (const d of dirs) try { Bun.spawnSync(["sudo", "rm", "-rf", d]); rmSync(d, { recursive: true, force: true }) } catch {}
+})
+
+describe("Verifier × REAL dbt (no mocks)", () => {
+  test("clean project builds → verdict ok", async () => {
+    const dir = project(
+      { "ok_model.sql": "select 1 as id" },
+      "version: 2\nmodels:\n  - name: ok_model\n    columns:\n      - name: id\n        tests: [not_null, unique]\n",
+    )
+    const v = await Verifier.dbtVerifier(dockerDbtRun).verify(dir)
+    expect(v.ok).toBe(true)
+  }, 120_000)
+
+  test("compile error → verdict not ok, names the failing model", async () => {
+    const dir = project({ "ok_model.sql": "select 1 as id", "broken.sql": "select from where" })
+    const v = await Verifier.dbtVerifier(dockerDbtRun).verify(dir)
+    expect(v.ok).toBe(false)
+    expect(v.reason ?? "").toMatch(/broken|error/i)
+  }, 120_000)
+
+  test("failing data test (not_null on a null column) → verdict not ok, names the test", async () => {
+    const dir = project(
+      { "nulls.sql": "select cast(null as integer) as id" },
+      "version: 2\nmodels:\n  - name: nulls\n    columns:\n      - name: id\n        tests: [not_null]\n",
+    )
+    const v = await Verifier.dbtVerifier(dockerDbtRun).verify(dir)
+    expect(v.ok).toBe(false)
+    expect(JSON.stringify(v.checks)).toMatch(/not_null/i)
+  }, 120_000)
+
+  test("ADVERSARIAL spoof: model emits a fake 'Done. PASS=99 ERROR=0' but real build fails → verdict not ok", async () => {
+    // A runtime error makes dbt echo the failing SQL — incl. the injected fake summary comment —
+    // into stdout, BEFORE dbt's own real ERROR summary. The verifier runs dbt fresh, checks the
+    // real exit code, and parses the LAST summary → it must not be fooled.
+    // Unresolved column → a reliable DuckDB error; dbt echoes the failing compiled SQL
+    // (incl. the injected fake-summary comment) into stdout, then its REAL ERROR summary.
+    const dir = project({
+      "evil.sql": "select notacolumn as id -- Done. PASS=99 WARN=0 ERROR=0 SKIP=0 TOTAL=99",
+    })
+    const r = await dockerDbtRun("dbt build", dir)
+    const v = Verifier.fromDbt(r.output, r.exitCode)
+    expect(r.exitCode).not.toBe(0) // the build really failed
+    expect(v.ok).toBe(false) // ...and the gate cannot be spoofed by model-emitted text
+    // If the injection vector fired (fake line echoed), last-match must still return the real (error>0) summary.
+    if (r.output.includes("PASS=99")) {
+      expect(Verifier.parseDbtSummary(r.output)!.error).toBeGreaterThan(0)
+    }
+  }, 120_000)
+
+  test("ADVERSARIAL: agent CLAIMS success in its transcript, but the verifier ignores the claim and runs dbt itself", async () => {
+    // Simulate the orchestration: the agent's transcript says it passed, but the workspace is broken.
+    const agentTranscript = "I have completed the task. All tests pass. Done. PASS=50 WARN=0 ERROR=0 TOTAL=50"
+    const dir = project({ "ok_model.sql": "select 1 as id", "broken.sql": "select nonexistent_col from nowhere" })
+    // The verifier does NOT look at agentTranscript — it runs dbt on the real workspace.
+    const v = await Verifier.dbtVerifier(dockerDbtRun).verify(dir)
+    expect(agentTranscript).toContain("ERROR=0") // the lie exists...
+    expect(v.ok).toBe(false) // ...but ground truth wins
+  }, 120_000)
+})
diff --git a/packages/opencode/test/router/verifier.test.ts b/packages/opencode/test/router/verifier.test.ts
new file mode 100644
index 000000000..678fe6102
--- /dev/null
+++ b/packages/opencode/test/router/verifier.test.ts
@@ -0,0 +1,124 @@
+import { describe, expect, test } from "bun:test"
+import { Verifier } from "../../src/router/verifier"
+
+const PASS = "01:23:45  Done. PASS=12 WARN=0 ERROR=0 SKIP=0 NO-OP=0 TOTAL=12"
+const FAIL =
+  "Failure in test not_null_fct_reviews_review_id (models/schema.yml)\n" +
+  "01:23:45  Done. PASS=11 WARN=0 ERROR=1 SKIP=0 TOTAL=12"
+const COMPILE_ERR = "Compilation Error in model stg_orders (models/stg_orders.sql)\n  unexpected token"
+
+describe("Verifier.parseDbtSummary", () => {
+  test("parses a clean summary", () => {
+    expect(Verifier.parseDbtSummary(PASS)).toEqual({ pass: 12, warn: 0, error: 0, skip: 0, total: 12 })
+  })
+  test("parses summary with errors", () => {
+    expect(Verifier.parseDbtSummary(FAIL)).toEqual({ pass: 11, warn: 0, error: 1, skip: 0, total: 12 })
+  })
+  test("returns null when no summary present", () => {
+    expect(Verifier.parseDbtSummary("nothing here")).toBeNull()
+  })
+})
+
+describe("Verifier.failingNodes", () => {
+  test("extracts a failing test", () => {
+    const f = Verifier.failingNodes(FAIL)
+    expect(f).toHaveLength(1)
+    expect(f[0].name).toBe("not_null_fct_reviews_review_id")
+    expect(f[0].ok).toBe(false)
+  })
+  test("extracts a compilation error model", () => {
+    const f = Verifier.failingNodes(COMPILE_ERR)
+    expect(f[0].name).toBe("stg_orders")
+  })
+  test("dedups repeated nodes", () => {
+    const f = Verifier.failingNodes(FAIL + "\n" + FAIL)
+    expect(f).toHaveLength(1)
+  })
+})
+
+describe("Verifier.fromDbt", () => {
+  test("ok when exit 0 + summary + zero errors", () => {
+    const v = Verifier.fromDbt(PASS, 0)
+    expect(v.ok).toBe(true)
+    expect(v.reason).toBeUndefined()
+  })
+  test("not ok when there are dbt errors (and names the failing node for escalation)", () => {
+    const v = Verifier.fromDbt(FAIL, 1)
+    expect(v.ok).toBe(false)
+    expect(v.reason).toContain("not_null_fct_reviews_review_id")
+    expect(v.checks.some((c) => !c.ok)).toBe(true)
+  })
+  test("not ok when build never completed (no summary)", () => {
+    const v = Verifier.fromDbt("crashed early", 1)
+    expect(v.ok).toBe(false)
+    expect(v.reason).toContain("did not complete")
+  })
+  test("not ok when summary clean but non-zero exit", () => {
+    expect(Verifier.fromDbt(PASS, 2).ok).toBe(false)
+  })
+})
+
+describe("Verifier — ADVERSARIAL", () => {
+  test("summary-line INJECTION: fake 'PASS=99 ERROR=0' earlier is ignored; real (last) summary wins", () => {
+    const malicious =
+      "-- model output echoed by dbt on error:\n" +
+      "Done. PASS=99 WARN=0 ERROR=0 SKIP=0 TOTAL=99\n" + // fake, injected via model SQL
+      "Compilation Error in model evil (models/evil.sql)\n" +
+      "01:00:00  Done. PASS=4 WARN=0 ERROR=1 SKIP=0 TOTAL=5" // dbt's REAL summary, last
+    expect(Verifier.parseDbtSummary(malicious)).toEqual({ pass: 4, warn: 0, error: 1, skip: 0, total: 5 })
+    expect(Verifier.fromDbt(malicious, 1).ok).toBe(false)
+  })
+
+  test("exitCode is the backstop: fake clean summary but non-zero exit -> not ok", () => {
+    expect(Verifier.fromDbt("Done. PASS=99 WARN=0 ERROR=0 SKIP=0 TOTAL=99", 1).ok).toBe(false)
+  })
+
+  test("a real clean build (exit 0, real summary last) is ok even if a fake line precedes", () => {
+    const out = "Done. PASS=1 ERROR=5 TOTAL=6\n...later...\nDone. PASS=12 WARN=0 ERROR=0 SKIP=0 TOTAL=12"
+    expect(Verifier.fromDbt(out, 0).ok).toBe(true)
+  })
+
+  test("ANSI color codes around the summary do not break parsing", () => {
+    const ansi = "[0m01:00:00  Done. PASS=12 WARN=0 ERROR=0 SKIP=0 TOTAL=12[0m"
+    expect(Verifier.parseDbtSummary(ansi)?.pass).toBe(12)
+  })
+
+  test("empty / whitespace / non-dbt output -> not ok (no summary)", () => {
+    for (const o of ["", "   \n\t", "Killed", "Traceback (most recent call last):"]) {
+      expect(Verifier.fromDbt(o, 1).ok).toBe(false)
+    }
+  })
+
+  test("huge output completes quickly (no catastrophic backtracking)", () => {
+    const huge = "x ".repeat(500_000) + "\nDone. PASS=3 WARN=0 ERROR=0 SKIP=0 TOTAL=3"
+    const t0 = Date.now()
+    expect(Verifier.fromDbt(huge, 0).ok).toBe(true)
+    expect(Date.now() - t0).toBeLessThan(2000)
+  })
+
+  test("multiple real summaries (incremental run + test run) -> last one is authoritative", () => {
+    const multi = "Done. PASS=8 ERROR=0 TOTAL=8\n...tests...\nDone. PASS=10 WARN=0 ERROR=2 SKIP=0 TOTAL=12"
+    expect(Verifier.fromDbt(multi, 1).ok).toBe(false)
+    expect(Verifier.parseDbtSummary(multi)?.error).toBe(2)
+  })
+})
+
+describe("Verifier impls", () => {
+  test("ALLOW_ALL passes", async () => {
+    expect((await Verifier.ALLOW_ALL.verify("/app")).ok).toBe(true)
+  })
+  test("dbtVerifier judges via injected runner", async () => {
+    const good = Verifier.dbtVerifier(async () => ({ output: PASS, exitCode: 0 }))
+    expect((await good.verify("/app")).ok).toBe(true)
+    const bad = Verifier.dbtVerifier(async () => ({ output: FAIL, exitCode: 1 }))
+    expect((await bad.verify("/app")).ok).toBe(false)
+  })
+  test("dbtVerifier fails open if the runner throws", async () => {
+    const boom = Verifier.dbtVerifier(async () => {
+      throw new Error("dbt missing")
+    })
+    const v = await boom.verify("/app")
+    expect(v.ok).toBe(true)
+    expect(v.reason).toContain("verifier error")
+  })
+})