From 20073199eba264d772d9af92708d208f2aa74169 Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jpr5@darkridge.com>
Date: Fri, 12 Jun 2026 11:36:52 -0700
Subject: [PATCH 1/6] Add canonicalizeFragment utility for fragment shadow-gate
 comparison

---
 .../atlas-canonicalize-fragment.test.ts       | 135 ++++++++++++++++++
 src/atlas/canonicalize.ts                     |  72 ++++++++++
 2 files changed, 207 insertions(+)
 create mode 100644 src/__tests__/atlas-canonicalize-fragment.test.ts

diff --git a/src/__tests__/atlas-canonicalize-fragment.test.ts b/src/__tests__/atlas-canonicalize-fragment.test.ts
new file mode 100644
index 0000000..8238aea
--- /dev/null
+++ b/src/__tests__/atlas-canonicalize-fragment.test.ts
@@ -0,0 +1,135 @@
+import { describe, it, expect } from "vitest";
+import { canonicalizeFragment } from "../atlas/canonicalize.js";
+
+// Tests for the §6.2 `canonicalizeFragment` JSON-stringify normalizer used by
+// the Phase-2 dual-run shadow comparator. The function is named
+// `canonicalizeFragment` (not `canonicalize`) inside src/atlas/canonicalize.ts
+// because that module already exports the Tier-3 ranker `canonicalize`
+// (different signature, different role). See SLOT-1 of the implementation plan.
+
+describe("canonicalizeFragment — §6.2 normalizer", () => {
+  // ── T1: recursive key-sort stability ────────────────────────────────────────
+  // Two objects whose keys differ only by INSERTION ORDER must canonicalize to
+  // byte-identical JSON.stringify output. The dual-run comparator relies on
+  // `JSON.stringify(canonicalizeFragment(a)) === JSON.stringify(canonicalizeFragment(b))`
+  // (spec §6.2), so the canonical output must be stable regardless of which
+  // order the model emitted the keys.
+  it("T1: emits stable JSON-stringified output across key-permuted inputs", () => {
+    // Two CandidateFragment-shaped objects whose top-level AND nested keys are
+    // permuted between the two literals. Same field values; different order.
+    const a = {
+      sourcetype: "github" as const,
+      subsystem: "atlas",
+      title: "Schema enforcement at the leaf boundary",
+      content: "Hello world.",
+      claimSlugHint: "schema-enforcement",
+      evidence: [
+        { kind: "url", ref: "https://example.com/a" },
+        { kind: "url", ref: "https://example.com/b" },
+      ],
+      provenance: {
+        date: "2026-06-12",
+        classification: {
+          knowledge_type: "behavior",
+          provenance_class: "primary",
+          validation_status: "showcase-verified",
+          confidence: "high",
+        },
+      },
+    };
+
+    const b = {
+      // Top-level keys permuted.
+      title: "Schema enforcement at the leaf boundary",
+      provenance: {
+        // Nested keys permuted.
+        classification: {
+          confidence: "high",
+          validation_status: "showcase-verified",
+          provenance_class: "primary",
+          knowledge_type: "behavior",
+        },
+        date: "2026-06-12",
+      },
+      // evidence kept in the SAME element order (arrays are positional).
+      evidence: [
+        { ref: "https://example.com/a", kind: "url" },
+        { ref: "https://example.com/b", kind: "url" },
+      ],
+      claimSlugHint: "schema-enforcement",
+      content: "Hello world.",
+      subsystem: "atlas",
+      sourcetype: "github" as const,
+    };
+
+    const canonA = JSON.stringify(canonicalizeFragment(a));
+    const canonB = JSON.stringify(canonicalizeFragment(b));
+    expect(canonA).toBe(canonB);
+    // Also assert deep-equal as a redundant structural check.
+    expect(canonicalizeFragment(a)).toEqual(canonicalizeFragment(b));
+  });
+
+  // ── T2: whitespace normalization + numeric round-trip + array order ────────
+  // §6.2(b): strings trim + collapse internal whitespace runs (including
+  // newlines and tabs) to a single space — explicitly lossy on free-text fields.
+  // §6.2(c): numeric round-trip so `1.0 ≡ 1`.
+  // §6.2(d): arrays are NOT sorted — element order is load-bearing for
+  // `evidence[]` and must be preserved positionally.
+  it("T2: normalizes whitespace lossily, round-trips numerics, preserves array order", () => {
+    // Whitespace: leading + trailing trim, internal runs (spaces + newlines +
+    // tabs) collapse to ONE space.
+    const out = canonicalizeFragment({
+      content: "  hello\n\n  world  ",
+      title: "foo  bar",
+      provenance: {
+        // Nested string also normalized.
+        note: "alpha\n\tbeta",
+      },
+    }) as {
+      content: string;
+      title: string;
+      provenance: { note: string };
+    };
+    expect(out.content).toBe("hello world");
+    expect(out.title).toBe("foo bar");
+    expect(out.provenance.note).toBe("alpha beta");
+
+    // Whitespace-only string collapses to empty (trim removes everything).
+    const ws = canonicalizeFragment({ s: "   \n\t  " }) as { s: string };
+    expect(ws.s).toBe("");
+
+    // Numeric canonicalization: 1.0 and 1 must compare equal after canonicalize.
+    const n1 = JSON.stringify(canonicalizeFragment({ x: 1.0 }));
+    const n2 = JSON.stringify(canonicalizeFragment({ x: 1 }));
+    expect(n1).toBe(n2);
+    // Non-integer numerics also round-trip stably.
+    const n3 = JSON.stringify(canonicalizeFragment({ x: 1.5 }));
+    const n4 = JSON.stringify(canonicalizeFragment({ x: 1.5 }));
+    expect(n3).toBe(n4);
+
+    // Array order PRESERVED — ["a","b"] must NOT canonicalize equal to ["b","a"].
+    const ab = JSON.stringify(canonicalizeFragment({ arr: ["a", "b"] }));
+    const ba = JSON.stringify(canonicalizeFragment({ arr: ["b", "a"] }));
+    expect(ab).not.toBe(ba);
+
+    // Same array order DOES canonicalize equal, even with nested objects whose
+    // keys are permuted.
+    const ev1 = JSON.stringify(
+      canonicalizeFragment({
+        evidence: [
+          { kind: "url", ref: "x" },
+          { kind: "url", ref: "y" },
+        ],
+      }),
+    );
+    const ev2 = JSON.stringify(
+      canonicalizeFragment({
+        evidence: [
+          { ref: "x", kind: "url" },
+          { ref: "y", kind: "url" },
+        ],
+      }),
+    );
+    expect(ev1).toBe(ev2);
+  });
+});
diff --git a/src/atlas/canonicalize.ts b/src/atlas/canonicalize.ts
index ef55cce..6e0e870 100644
--- a/src/atlas/canonicalize.ts
+++ b/src/atlas/canonicalize.ts
@@ -302,3 +302,75 @@ export function canonicalize(fragments: CandidateFragment[]): Candidate[] {
   });
   return candidates;
 }
+
+// ── canonicalizeFragment ──────────────────────────────────────────────────────
+//
+// Per-fragment structural normalizer for the Phase-2 dual-run shadow comparator
+// (spec §6.2). Two fragments are considered "the same" iff
+// `JSON.stringify(canonicalizeFragment(a)) === JSON.stringify(canonicalizeFragment(b))`,
+// so the output must be stable under input variations that the comparator
+// does NOT care about: object-key insertion order, whitespace inside free-text
+// fields, and `1.0` vs `1` numeric encoding from the model.
+//
+// Distinct from the Tier-3 `canonicalize(fragments[])` ranker above — that one
+// dedups+ranks an ARRAY of CandidateFragments, this one structurally normalizes
+// ONE fragment-shaped object for byte-equality comparison. The two names
+// intentionally diverge from the spec's `canonicalize(fragment)` so they can
+// coexist in this module without shadowing each other; T1, T2 and T10 import
+// `canonicalizeFragment` by name (see SLOT-1 of the impl plan, spec footer).
+//
+// Behavior (§6.2):
+//   (a) Recursive object-key sort — keys at every depth emit in sorted order.
+//   (b) String-field whitespace normalization — trim leading/trailing
+//       whitespace, then collapse every internal run of whitespace + newlines
+//       (\s+) to a single space. This is **intentionally LOSSY** on free-text
+//       fields: multi-line `content` formatting and intentional double-spaces
+//       are flattened. Accepted because byte-identity on free-text from two
+//       independent LLM draws is infeasible even with a fixed seed; what we
+//       actually want to compare is structure + content-modulo-whitespace.
+//   (c) Numeric canonicalization — `Number(n).toString()` round-trip via
+//       `+value`, so `1.0` and `1` and `1.00` all normalize to the same
+//       in-memory number and JSON.stringify identically. Non-finite values
+//       (NaN, ±Infinity) are passed through unchanged (JSON.stringify will
+//       emit them as `null`, which is the same null on both sides).
+//   (d) Array order PRESERVED — arrays are NOT sorted. `evidence[]` and
+//       `validationTargets[]` element order is load-bearing and must compare
+//       positionally. Array ELEMENTS are recursively normalized in place.
+//
+// Pure: never mutates the input.
+export function canonicalizeFragment(fragment: object): object {
+  return canonicalizeValue(fragment) as object;
+}
+
+function canonicalizeValue(value: unknown): unknown {
+  if (typeof value === "string") {
+    // §6.2(b): trim then collapse internal whitespace+newline runs to ONE
+    // space. \s covers spaces, tabs, newlines, CR, vertical tab, form feed.
+    return value.trim().replace(/\s+/g, " ");
+  }
+  if (typeof value === "number") {
+    // §6.2(c): round-trip through Number so 1.0 ≡ 1. Pass non-finite values
+    // through unchanged (Number(NaN) === NaN; JSON.stringify will emit null
+    // on both sides regardless).
+    return Number.isFinite(value) ? +value : value;
+  }
+  if (Array.isArray(value)) {
+    // §6.2(d): preserve element order; recurse into each element.
+    return value.map(canonicalizeValue);
+  }
+  if (value !== null && typeof value === "object") {
+    // §6.2(a): emit keys in sorted order. We build a NEW object inserting keys
+    // in sorted order; modern V8 preserves insertion order for string keys, so
+    // JSON.stringify emits them in that order — making the stringified output
+    // stable across key-permuted inputs.
+    const entries = Object.entries(value as Record<string, unknown>);
+    entries.sort(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0));
+    const out: Record<string, unknown> = {};
+    for (const [k, v] of entries) {
+      out[k] = canonicalizeValue(v);
+    }
+    return out;
+  }
+  // booleans, null, undefined, bigint, symbol → unchanged.
+  return value;
+}

From c923c1497e7b0930dc5e4ed0c2b0d773d1a303ec Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jpr5@darkridge.com>
Date: Fri, 12 Jun 2026 11:36:52 -0700
Subject: [PATCH 2/6] Add EpisodicCandidateFragmentSchema with
 sensitivity-floor transform and 4 invariants

---
 src/__tests__/atlas-episodic-schema.test.ts | 149 ++++++++++++++++++++
 src/atlas/types.ts                          |  51 +++++++
 2 files changed, 200 insertions(+)
 create mode 100644 src/__tests__/atlas-episodic-schema.test.ts

diff --git a/src/__tests__/atlas-episodic-schema.test.ts b/src/__tests__/atlas-episodic-schema.test.ts
new file mode 100644
index 0000000..2dbd707
--- /dev/null
+++ b/src/__tests__/atlas-episodic-schema.test.ts
@@ -0,0 +1,149 @@
+import { describe, it, expect } from "vitest";
+import { EpisodicCandidateFragmentSchema } from "../atlas/types.js";
+
+// ── T6 — EpisodicCandidateFragmentSchema invariants (spec §4.6 / §7.3) ────────
+//
+// EpisodicCandidateFragmentSchema narrows CandidateFragmentSchema with five
+// episodic-leaf invariants:
+//   - needsReview === true                       (refine, reject)
+//   - provenance_class === "derived"             (refine, reject)
+//   - confidence === "low"                       (refine, reject)
+//   - validation_status === "unverified"         (refine, reject)
+//   - sensitivity floor "internal" (transform: "public" coerced up; stronger
+//     values preserved verbatim — NOT a reject-below rule)
+//
+// These tests prove (i) the sensitivity transform coerces the four input
+// sensitivities correctly, and (ii) each of the four predicate invariants
+// rejects with an error path/message that names the violated field.
+
+// Base fixture: a structurally-valid episodic fragment with every episodic
+// invariant satisfied. Per-test variants clone this and mutate ONE field so the
+// failure cause is unambiguous.
+const baseEpisodic = () => ({
+  sourcetype: "episodic" as const,
+  subsystem: "agent-orchestration",
+  source_name: "session-2026-06-12",
+  title:
+    "Blitz manifest decomposition is the orchestrator's job, not the executor's",
+  content:
+    "When the user invokes a blitz, the orchestrator (not a sub-agent) decomposes the plan into Depends-annotated slot tasks. Executors receive a single pre-computed slot and never see the manifest.",
+  provenance: {
+    source: "episodic-session",
+    classification: {
+      sensitivity: "internal" as const,
+      knowledge_type: "process" as const,
+      audience: "all-staff",
+      validation_status: "unverified" as const,
+      confidence: "low" as const,
+      provenance_class: "derived" as const,
+      freshness: { as_of: "2026-06-12" },
+    },
+  },
+  evidence: [],
+  needsReview: true,
+  validationTargets: [],
+});
+
+describe("EpisodicCandidateFragmentSchema — sensitivity-floor transform", () => {
+  it("coerces sensitivity=public up to internal", () => {
+    const input = baseEpisodic();
+    input.provenance.classification.sensitivity = "public" as "internal";
+    const parsed = EpisodicCandidateFragmentSchema.parse(input);
+    expect(parsed.provenance.classification.sensitivity).toBe("internal");
+  });
+
+  it("preserves sensitivity=internal verbatim", () => {
+    const input = baseEpisodic();
+    input.provenance.classification.sensitivity = "internal";
+    const parsed = EpisodicCandidateFragmentSchema.parse(input);
+    expect(parsed.provenance.classification.sensitivity).toBe("internal");
+  });
+
+  it("preserves sensitivity=proprietary verbatim (stronger than floor)", () => {
+    const input = baseEpisodic();
+    input.provenance.classification.sensitivity = "proprietary" as "internal";
+    const parsed = EpisodicCandidateFragmentSchema.parse(input);
+    expect(parsed.provenance.classification.sensitivity).toBe("proprietary");
+  });
+
+  it("preserves sensitivity=secret verbatim (strongest)", () => {
+    const input = baseEpisodic();
+    input.provenance.classification.sensitivity = "secret" as "internal";
+    const parsed = EpisodicCandidateFragmentSchema.parse(input);
+    expect(parsed.provenance.classification.sensitivity).toBe("secret");
+  });
+
+  it("does not mutate the caller's input when coercing sensitivity to floor", () => {
+    // Regression: a `.transform` that writes through `f.provenance.classification.sensitivity = ...`
+    // would mutate the caller's input. Zod actually rebuilds the object graph on parse,
+    // so the transform is non-mutating in practice. This test pins that empirical guarantee.
+    // If a future maintainer "optimizes" the transform to in-place mutation (or Zod's
+    // semantics change), this test catches it before the regression ships.
+    const input = baseEpisodic();
+    input.provenance.classification.sensitivity = "public" as "internal";
+    const snapshot = structuredClone(input);
+    const parsed = EpisodicCandidateFragmentSchema.parse(input);
+    expect(input).toEqual(snapshot); // input not mutated
+    expect(input.provenance.classification.sensitivity).toBe("public");
+    expect(parsed.provenance.classification.sensitivity).toBe("internal"); // coerced on output
+  });
+});
+
+describe("EpisodicCandidateFragmentSchema — predicate-refinement rejections", () => {
+  it("rejects needsReview=false (episodic must be needsReview=true)", () => {
+    const input = baseEpisodic();
+    input.needsReview = false;
+    const result = EpisodicCandidateFragmentSchema.safeParse(input);
+    expect(result.success).toBe(false);
+    if (!result.success) {
+      const issue = result.error.issues.find((i) =>
+        i.path.includes("needsReview"),
+      );
+      expect(issue).toBeDefined();
+      expect(issue!.message).toMatch(/needsReview/);
+    }
+  });
+
+  it("rejects provenance_class=primary (episodic must be derived)", () => {
+    const input = baseEpisodic();
+    input.provenance.classification.provenance_class = "primary" as "derived";
+    const result = EpisodicCandidateFragmentSchema.safeParse(input);
+    expect(result.success).toBe(false);
+    if (!result.success) {
+      const issue = result.error.issues.find((i) =>
+        i.path.includes("provenance_class"),
+      );
+      expect(issue).toBeDefined();
+      expect(issue!.message).toMatch(/provenance_class/);
+    }
+  });
+
+  it("rejects confidence=high (episodic must be confidence=low)", () => {
+    const input = baseEpisodic();
+    input.provenance.classification.confidence = "high" as "low";
+    const result = EpisodicCandidateFragmentSchema.safeParse(input);
+    expect(result.success).toBe(false);
+    if (!result.success) {
+      const issue = result.error.issues.find((i) =>
+        i.path.includes("confidence"),
+      );
+      expect(issue).toBeDefined();
+      expect(issue!.message).toMatch(/confidence/);
+    }
+  });
+
+  it("rejects validation_status=source-verified (episodic must be unverified)", () => {
+    const input = baseEpisodic();
+    input.provenance.classification.validation_status =
+      "source-verified" as "unverified";
+    const result = EpisodicCandidateFragmentSchema.safeParse(input);
+    expect(result.success).toBe(false);
+    if (!result.success) {
+      const issue = result.error.issues.find((i) =>
+        i.path.includes("validation_status"),
+      );
+      expect(issue).toBeDefined();
+      expect(issue!.message).toMatch(/validation_status/);
+    }
+  });
+});
diff --git a/src/atlas/types.ts b/src/atlas/types.ts
index b0b3ca6..722f8d8 100644
--- a/src/atlas/types.ts
+++ b/src/atlas/types.ts
@@ -147,6 +147,57 @@ export const CandidateFragmentSchema = CandidateFragmentObject.refine(
   SUBSYSTEM_NO_DELIMITER_ISSUE,
 );
 
+// ── EpisodicCandidateFragmentSchema (spec §4.6) ───────────────────────────────
+//
+// A wrapping variant of `CandidateFragmentSchema` that enforces the five
+// episodic-leaf invariants. Four are predicate refinements that REJECT on
+// violation; the fifth (sensitivity) is a `.transform()` that COERCES UP to
+// the `"internal"` floor — `"public"` is silently rewritten to `"internal"`,
+// while `"proprietary"` / `"secret"` are preserved verbatim. This is the
+// "coerce up to floor" rule, NOT "reject below floor".
+//
+// The CLI helper detects `sourcetype === "episodic"` after the base parse and
+// runs this narrowed schema as a SECOND parse. The on-disk fragment carries
+// the coerced sensitivity value.
+//
+// If you add a `.regex(...)`, `.refine(...)`, or `.transform(...)` here, update
+// the §4.1.1 refinement-audit test (T9) — JSON Schema conversion silently
+// drops these and they must be wired into the post-pass.
+export const EpisodicCandidateFragmentSchema = CandidateFragmentSchema.refine(
+  (f) => f.needsReview === true,
+  {
+    message: "episodic fragment requires needsReview=true",
+    path: ["needsReview"],
+  },
+)
+  .refine((f) => f.provenance.classification.provenance_class === "derived", {
+    message: "episodic requires provenance_class=derived",
+    path: ["provenance", "classification", "provenance_class"],
+  })
+  .refine((f) => f.provenance.classification.confidence === "low", {
+    message: "episodic requires confidence=low (clamped)",
+    path: ["provenance", "classification", "confidence"],
+  })
+  .refine(
+    (f) => f.provenance.classification.validation_status === "unverified",
+    {
+      message: "episodic requires validation_status=unverified",
+      path: ["provenance", "classification", "validation_status"],
+    },
+  )
+  // Sensitivity-floor transform: coerce up to "internal" floor (NOT reject-below).
+  // "public" → "internal"; "internal" / "proprietary" / "secret" preserved verbatim.
+  .transform((f) => {
+    if (f.provenance.classification.sensitivity === "public") {
+      f.provenance.classification.sensitivity = "internal";
+    }
+    return f;
+  });
+
+export type EpisodicCandidateFragment = z.infer<
+  typeof EpisodicCandidateFragmentSchema
+>;
+
 // ── Candidate (Tier-3 finalized row, 1:1 with an atlas_seed_entries row) ───────
 
 export const CandidateSchema = CandidateFragmentObject.extend({

From 6825bdc34db839c19f2eb0cdbc1a479b88aa0227 Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jpr5@darkridge.com>
Date: Fri, 12 Jun 2026 11:36:52 -0700
Subject: [PATCH 3/6] Derive JSON Schema from CandidateFragment via
 zod-to-json-schema

---
 package-lock.json                       |  19 ++-
 package.json                            |   5 +-
 src/__tests__/atlas-json-schema.test.ts | 188 ++++++++++++++++++++++++
 src/atlas/json-schema.ts                |  90 ++++++++++++
 4 files changed, 293 insertions(+), 9 deletions(-)
 create mode 100644 src/__tests__/atlas-json-schema.test.ts
 create mode 100644 src/atlas/json-schema.ts

diff --git a/package-lock.json b/package-lock.json
index 76f5089..4a9978d 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -28,7 +28,8 @@
         "pgvector": "^0.2.0",
         "simple-git": "^3.27.0",
         "yaml": "^2.8.3",
-        "zod": "^3.23.8"
+        "zod": "^3.23.8",
+        "zod-to-json-schema": "^3.25.2"
       },
       "bin": {
         "atlas": "dist/atlas-cli.js",
@@ -44,6 +45,8 @@
         "@types/jsdom": "^28.0.1",
         "@types/node": "^25.0.6",
         "@types/pg": "^8.11.10",
+        "ajv": "^8.20.0",
+        "ajv-formats": "^3.0.1",
         "jsdom": "^28.0.0",
         "tsx": "^4.21.0",
         "typescript": "^5.9.3",
@@ -1829,9 +1832,9 @@
       }
     },
     "node_modules/ajv": {
-      "version": "8.18.0",
-      "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.18.0.tgz",
-      "integrity": "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==",
+      "version": "8.20.0",
+      "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.20.0.tgz",
+      "integrity": "sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA==",
       "license": "MIT",
       "dependencies": {
         "fast-deep-equal": "^3.1.3",
@@ -5931,12 +5934,12 @@
       }
     },
     "node_modules/zod-to-json-schema": {
-      "version": "3.25.1",
-      "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.25.1.tgz",
-      "integrity": "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==",
+      "version": "3.25.2",
+      "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.25.2.tgz",
+      "integrity": "sha512-O/PgfnpT1xKSDeQYSCfRI5Gy3hPf91mKVDuYLUHZJMiDFptvP41MSnWofm8dnCm0256ZNfZIM7DSzuSMAFnjHA==",
       "license": "ISC",
       "peerDependencies": {
-        "zod": "^3.25 || ^4"
+        "zod": "^3.25.28 || ^4"
       }
     }
   }
diff --git a/package.json b/package.json
index c5f3438..f856913 100644
--- a/package.json
+++ b/package.json
@@ -72,7 +72,8 @@
     "pgvector": "^0.2.0",
     "simple-git": "^3.27.0",
     "yaml": "^2.8.3",
-    "zod": "^3.23.8"
+    "zod": "^3.23.8",
+    "zod-to-json-schema": "^3.25.2"
   },
   "peerDependencies": {
     "@xenova/transformers": "^2.17.0",
@@ -100,6 +101,8 @@
     "@types/jsdom": "^28.0.1",
     "@types/node": "^25.0.6",
     "@types/pg": "^8.11.10",
+    "ajv": "^8.20.0",
+    "ajv-formats": "^3.0.1",
     "jsdom": "^28.0.0",
     "tsx": "^4.21.0",
     "typescript": "^5.9.3",
diff --git a/src/__tests__/atlas-json-schema.test.ts b/src/__tests__/atlas-json-schema.test.ts
new file mode 100644
index 0000000..82d1f78
--- /dev/null
+++ b/src/__tests__/atlas-json-schema.test.ts
@@ -0,0 +1,188 @@
+// T3 — Atlas JSON Schema derivation tests (spec §4.1, §7.1, §7.3).
+//
+// Verifies the family-picker + the two derived JSON Schema documents in
+// `src/atlas/json-schema.ts`:
+//
+//   1. Each derived schema has the expected top-level structure (`object`
+//      type at the schema root, regardless of whether zod-to-json-schema
+//      wraps it in `definitions`).
+//   2. `jsonSchemaForFamily("episodic")` returns the episodic schema by
+//      reference; every other `SourceType` returns the base schema.
+//   3. Conformance test — a known-good fragment validates against the
+//      derived JSON Schema via ajv. This guards against silent prop-drop
+//      in the zod-to-json-schema converter (any required property the
+//      converter loses would cause the validator to accept a malformed
+//      fragment OR reject a valid one).
+//
+// ajv is a JSON Schema validator (Draft-07 / 2019-09 / 2020-12); the
+// zod-to-json-schema output targets Draft-07 by default, which ajv@8
+// supports natively. ajv-formats wires the standard format keywords
+// (`uri`, `date-time`, etc.) even though we don't currently use them on
+// the fragment — added defensively so a future `.regex(...)` / format
+// constraint added to types.ts surfaces immediately.
+
+import { describe, it, expect } from "vitest";
+// ajv + ajv-formats ship CJS default exports; under our ESM `"type": "module"`
+// + `verbatimModuleSyntax` config, the runtime value lives on `.default` while
+// the type still resolves through the namespace import. Pull both off `.default`.
+import * as ajvNs from "ajv";
+import * as ajvFormatsNs from "ajv-formats";
+const Ajv = (ajvNs as unknown as { default: typeof import("ajv").default })
+  .default;
+const addFormats = (
+  ajvFormatsNs as unknown as { default: typeof import("ajv-formats").default }
+).default;
+
+import {
+  CANDIDATE_FRAGMENT_JSON_SCHEMA,
+  EPISODIC_CANDIDATE_FRAGMENT_JSON_SCHEMA,
+  jsonSchemaForFamily,
+} from "../atlas/json-schema.js";
+import {
+  CandidateFragmentSchema,
+  type CandidateFragment,
+} from "../atlas/types.js";
+
+// A structurally-valid CandidateFragment matching the §9.3 contract. Mirrors
+// the cross-source-subsystem fixture shape (notion-doc fragment from
+// fixtures/atlas/aggregate/cross-source-subsystem.json) but inlined so the
+// test does not depend on fixture-file path stability.
+const KNOWN_GOOD_FRAGMENT: CandidateFragment = {
+  sourcetype: "notion-doc",
+  subsystem: "agui-protocol",
+  claimSlugHint: "interrupt-resume-keying",
+  source_name: "notion-doc",
+  repo_url: "https://github.com/ag-ui-protocol/ag-ui",
+  ref: "interrupts-adr",
+  title: "Interrupt resume links via interruptId, NOT parentRunId",
+  content:
+    "The Interrupts design decided a resume is linked to its interrupt via interruptId rather than parentRunId.",
+  provenance: {
+    source: "notion-doc",
+    url: "https://www.notion.so/copilotkit/Interrupts-Proposal-Design-Decisions-Reasoning",
+    date: "2026-04-18",
+    classification: {
+      sensitivity: "internal",
+      knowledge_type: "design-rationale",
+      audience: "engineering",
+      validation_status: "source-verified",
+      confidence: "high",
+      provenance_class: "primary",
+      freshness: { as_of: "2026-04-18", re_verify_by: "2026-09-18" },
+    },
+  },
+  evidence: [
+    {
+      kind: "thread",
+      body: "Interrupts Proposal — Design Decisions & Reasoning",
+    },
+  ],
+  needsReview: false,
+  validationTargets: [],
+};
+
+// Sanity check — the inline fragment matches the Zod contract. If this fails,
+// the conformance assertion below would test a different schema than the one
+// the rest of the harvest pipeline parses (silent test rot).
+describe("KNOWN_GOOD_FRAGMENT sanity", () => {
+  it("parses against the Zod CandidateFragmentSchema", () => {
+    expect(() =>
+      CandidateFragmentSchema.parse(KNOWN_GOOD_FRAGMENT),
+    ).not.toThrow();
+  });
+});
+
+// `zod-to-json-schema` with `name: "..."` wraps the derived schema in a
+// top-level `{ $ref: "#/definitions/<name>", definitions: { <name>: {...} } }`
+// container. With `$refStrategy: "none"` the SUB-schemas are inlined, but
+// the OUTER wrapper still exists. Both ajv and a manual structural check
+// need to drill through `definitions[name]` to reach the actual schema body.
+function rootSchemaBody(
+  schema: Record<string, unknown>,
+  name: string,
+): Record<string, unknown> {
+  const defs = schema.definitions as
+    | Record<string, Record<string, unknown>>
+    | undefined;
+  if (defs && defs[name]) return defs[name];
+  // Fallback if a future zod-to-json-schema version stops wrapping.
+  return schema;
+}
+
+describe("CANDIDATE_FRAGMENT_JSON_SCHEMA shape", () => {
+  it("derives an object schema with the expected required keys", () => {
+    const schema = CANDIDATE_FRAGMENT_JSON_SCHEMA as Record<string, unknown>;
+    const body = rootSchemaBody(schema, "CandidateFragment");
+    expect(body.type).toBe("object");
+    // Top-level required-shape keys per spec §9.3 (mirrors
+    // CandidateFragmentObject.shape in types.ts).
+    const properties = body.properties as Record<string, unknown>;
+    expect(properties).toBeDefined();
+    for (const key of [
+      "sourcetype",
+      "subsystem",
+      "source_name",
+      "title",
+      "content",
+      "provenance",
+      "evidence",
+      "needsReview",
+      "validationTargets",
+    ]) {
+      expect(Object.keys(properties)).toContain(key);
+    }
+  });
+
+  it("validates a known-good fragment via ajv", () => {
+    const ajv = new Ajv({ strict: false, allErrors: true });
+    addFormats(ajv);
+    const validate = ajv.compile(CANDIDATE_FRAGMENT_JSON_SCHEMA);
+    const ok = validate(KNOWN_GOOD_FRAGMENT);
+    if (!ok) {
+      // Surface ajv errors so a converter regression is debuggable.
+      throw new Error(
+        `ajv rejected a known-good fragment: ${JSON.stringify(validate.errors, null, 2)}`,
+      );
+    }
+    expect(ok).toBe(true);
+  });
+});
+
+describe("EPISODIC_CANDIDATE_FRAGMENT_JSON_SCHEMA shape", () => {
+  it("derives an object schema (sub-shape inherited from base)", () => {
+    const schema = EPISODIC_CANDIDATE_FRAGMENT_JSON_SCHEMA as Record<
+      string,
+      unknown
+    >;
+    const body = rootSchemaBody(schema, "EpisodicCandidateFragment");
+    expect(body.type).toBe("object");
+    const properties = body.properties as Record<string, unknown>;
+    expect(properties).toBeDefined();
+    expect(Object.keys(properties)).toContain("sourcetype");
+    expect(Object.keys(properties)).toContain("needsReview");
+  });
+});
+
+describe("jsonSchemaForFamily", () => {
+  it("returns the episodic schema for family=episodic", () => {
+    expect(jsonSchemaForFamily("episodic")).toBe(
+      EPISODIC_CANDIDATE_FRAGMENT_JSON_SCHEMA,
+    );
+  });
+
+  it("returns the base schema for non-episodic families", () => {
+    // Cover several non-episodic SourceType values to guard against a future
+    // `if/else` ladder that special-cases more than just episodic.
+    for (const family of [
+      "memory",
+      "github-pr",
+      "github-issue",
+      "notion-doc",
+      "linear-doc",
+      "agent-doc",
+      "derived",
+    ] as const) {
+      expect(jsonSchemaForFamily(family)).toBe(CANDIDATE_FRAGMENT_JSON_SCHEMA);
+    }
+  });
+});
diff --git a/src/atlas/json-schema.ts b/src/atlas/json-schema.ts
new file mode 100644
index 0000000..77aaf5d
--- /dev/null
+++ b/src/atlas/json-schema.ts
@@ -0,0 +1,90 @@
+// Atlas JSON Schema derivation (spec §4.1).
+//
+// The orchestration shell that fans out atlas harvest leaves passes a JSON
+// Schema document to the harness `agent(prompt, {schema})` call so the model
+// emits a structurally-valid CandidateFragment by construction (Route-B of
+// spec §4). The atlas package owns the DERIVATION of that schema — a single
+// source of truth wired to the Zod contract in `./types.ts` — so the shell
+// can boot, call `jsonSchemaForFamily(family)`, and hand the result to the
+// harness without re-implementing schema conversion.
+//
+// Per spec §4.1.1, Zod `.refine(...)` / `.transform(...)` constraints are
+// silently dropped by `zod-to-json-schema` (they are runtime predicates, not
+// structural). Two callouts:
+//   1. The `subsystemHasNoDelimiter` refinement on `CandidateFragmentSchema`
+//      is dropped here; the post-pass Zod parse in
+//      `atlas harvest write-fragment --stdin` still rejects.
+//   2. The four episodic predicate refinements on
+//      `EpisodicCandidateFragmentSchema` (needsReview, provenance_class,
+//      confidence, validation_status) and the sensitivity-floor
+//      `.transform()` are runtime-only and are silently dropped by
+//      `zod-to-json-schema`; they are re-applied by the post-pass
+//      `EpisodicCandidateFragmentSchema.parse(...)` in
+//      `atlas harvest write-fragment --stdin`. The derived JSON Schema
+//      therefore enforces only the base structural contract — the episodic
+//      clamps live in the Zod post-pass.
+
+import { zodToJsonSchema } from "zod-to-json-schema";
+
+import {
+  CandidateFragmentSchema,
+  EpisodicCandidateFragmentSchema,
+  type CandidateFragment,
+} from "./types.js";
+
+// `sourcetype` is an inline enum on `CandidateFragmentObject` and is not
+// re-exported as a named symbol from `./types.js`. Derive it from the
+// inferred `CandidateFragment` type so this file stays in lock-step with the
+// Zod contract (any addition to the enum surfaces here as a type error
+// where `jsonSchemaForFamily` switches on it).
+export type SourceType = CandidateFragment["sourcetype"];
+
+// Base CandidateFragment JSON Schema (spec §4.1).
+//
+// `$refStrategy: "none"` inlines every sub-schema so the result is a single
+// self-contained document with no `$ref` indirection. The harness consumes
+// this schema directly; inlining keeps the wire payload self-describing and
+// avoids `$defs` resolution ordering issues across harness implementations.
+export const CANDIDATE_FRAGMENT_JSON_SCHEMA: object = zodToJsonSchema(
+  CandidateFragmentSchema,
+  { name: "CandidateFragment", $refStrategy: "none" },
+);
+
+// Episodic-narrowed CandidateFragment JSON Schema (spec §4.6).
+//
+// `EpisodicCandidateFragmentSchema` adds four predicate refinements
+// (needsReview=true, provenance_class=derived, confidence=low,
+// validation_status=unverified) and one `.transform()` (sensitivity floor).
+// `zod-to-json-schema` drops ALL of these because they are runtime-only
+// (refine/transform never round-trip into JSON Schema). The shell-side
+// schema therefore expresses only the base structural shape; the four
+// predicate clamps and the sensitivity-floor transform are re-applied by
+// the post-pass `EpisodicCandidateFragmentSchema.parse(...)` in
+// `atlas harvest write-fragment --stdin` (spec §4.2.1, step 3).
+export const EPISODIC_CANDIDATE_FRAGMENT_JSON_SCHEMA: object = zodToJsonSchema(
+  EpisodicCandidateFragmentSchema,
+  { name: "EpisodicCandidateFragment", $refStrategy: "none" },
+);
+
+/**
+ * Family-picker for the harness `agent(prompt, {schema})` call.
+ *
+ * Returns the JSON Schema document the orchestration shell should hand to
+ * the harness for a given leaf family:
+ *   - `"episodic"` → {@link EPISODIC_CANDIDATE_FRAGMENT_JSON_SCHEMA}
+ *   - any other `SourceType` value → {@link CANDIDATE_FRAGMENT_JSON_SCHEMA}
+ *
+ * This is the canonical entrypoint for the shell — the shell never imports
+ * the two `*_JSON_SCHEMA` constants directly; it switches on the family it
+ * is dispatching and lets this helper return the right document.
+ *
+ * Note: the returned schema is structural ONLY. The runtime-only Zod
+ * refinements (subsystem-delimiter guard, episodic invariant clamps,
+ * sensitivity-floor transform) are still enforced post-write by the
+ * `atlas harvest write-fragment --stdin` Zod parse — see spec §4.1.1.
+ */
+export function jsonSchemaForFamily(family: SourceType): object {
+  return family === "episodic"
+    ? EPISODIC_CANDIDATE_FRAGMENT_JSON_SCHEMA
+    : CANDIDATE_FRAGMENT_JSON_SCHEMA;
+}

From dde4718f6560e599892722e340b22ab856dfe665 Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jpr5@darkridge.com>
Date: Fri, 12 Jun 2026 11:36:52 -0700
Subject: [PATCH 4/6] Add atlas harvest write-fragment --stdin CLI with
 0/1/2/3/4 exit-code matrix

---
 .../atlas-write-fragment-cli.test.ts          | 629 ++++++++++++++++++
 src/atlas/harvest-cli.ts                      | 310 ++++++++-
 2 files changed, 938 insertions(+), 1 deletion(-)
 create mode 100644 src/__tests__/atlas-write-fragment-cli.test.ts

diff --git a/src/__tests__/atlas-write-fragment-cli.test.ts b/src/__tests__/atlas-write-fragment-cli.test.ts
new file mode 100644
index 0000000..9d57893
--- /dev/null
+++ b/src/__tests__/atlas-write-fragment-cli.test.ts
@@ -0,0 +1,629 @@
+// atlas harvest write-fragment --stdin CLI integration tests (spec §4.2.1, T8a-e + T11).
+//
+// Invokes the BUILT CLI as a subprocess via `node dist/atlas-cli.js harvest
+// write-fragment ...`, feeds it stdin, and asserts the exit-code matrix
+// 0/1/2/3/4 plus side effects:
+//   T8a — exit 0 with explicit --stem: file lands at the expected path.
+//   T8b — exit 0 with derived stem (no --stem): file lands at the
+//         canonical-key-derived stem path.
+//   T8c — exit 1 on bad stdin JSON: stderr names the JSON parse failure.
+//   T8d — exit 3 on base-schema failure (missing required field).
+//   T8e — exit 4 on episodic invariant failure (needsReview=false).
+//   T11 — exit 2 on stem collision (second write to the same stem).
+//
+// Each test runs inside its own tempdir so concurrent test execution does not
+// cross-pollute fragments. The dist build is assumed already done by
+// `npm run build` (test suite's standard prerequisite); a fast guard at the top
+// fails loud if dist/atlas-cli.js is missing rather than running tests against
+// a stale or absent build artifact.
+
+import { spawnSync } from "node:child_process";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+import { afterEach, beforeAll, beforeEach, describe, expect, it } from "vitest";
+import { z } from "zod";
+
+import { claimSlug } from "../atlas/canonicalize.js";
+import { isEpisodicInvariantIssue } from "../atlas/harvest-cli.js";
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const REPO_ROOT = path.resolve(__dirname, "..", "..");
+const CLI_PATH = path.join(REPO_ROOT, "dist", "atlas-cli.js");
+
+// A baseline CandidateFragment that passes CandidateFragmentSchema. Tests
+// shallow-clone + mutate to produce schema-failing / episodic-failing inputs.
+function baseFragment(overrides: Record<string, unknown> = {}): unknown {
+  return {
+    sourcetype: "github-pr",
+    subsystem: "cpk-runtime",
+    claimSlugHint: "explicit-hint-wins",
+    source_name: "github-pr",
+    repo_url: "https://github.com/CopilotKit/CopilotKit",
+    ref: "main",
+    title: "Some distilled claim about the runtime",
+    content: "why/how prose",
+    provenance: {
+      source: "github-pr",
+      date: "2026-06-08",
+      classification: {
+        sensitivity: "internal",
+        knowledge_type: "architecture",
+        audience: "all-staff",
+        validation_status: "source-verified",
+        confidence: "high",
+        provenance_class: "primary",
+        freshness: { as_of: "2026-06-08" },
+      },
+    },
+    evidence: [],
+    needsReview: false,
+    validationTargets: [],
+    ...overrides,
+  };
+}
+
+// Run the CLI with the provided stdin and argv tail. Returns the raw spawn
+// result so each test can assert exit code + stderr/stdout shape.
+function runCli(args: string[], stdin: string) {
+  return spawnSync("node", [CLI_PATH, "harvest", "write-fragment", ...args], {
+    input: stdin,
+    encoding: "utf-8",
+  });
+}
+
+describe("atlas harvest write-fragment --stdin CLI (spec §4.2 / T8 + T11)", () => {
+  let runsDir: string;
+  const runId = "test-run";
+
+  beforeAll(() => {
+    // Fail loud if the dist build is missing — running these tests against an
+    // absent build artifact would be a silent green-on-nothing pass.
+    if (!fs.existsSync(CLI_PATH)) {
+      throw new Error(
+        `dist build is missing (${CLI_PATH}); run \`npm run build\` first`,
+      );
+    }
+  });
+
+  beforeEach(() => {
+    runsDir = fs.mkdtempSync(path.join(os.tmpdir(), "atlas-wf-"));
+  });
+
+  afterEach(() => {
+    try {
+      fs.rmSync(runsDir, { recursive: true, force: true });
+    } catch {
+      // Tempdir cleanup is best-effort; OS tempdir reaper handles leftovers.
+    }
+  });
+
+  // T8a — explicit --stem, valid input → exit 0, file present at expected path.
+  it("T8a: exits 0 and writes the fragment when --stem is explicit", () => {
+    const stem = "explicit-stem";
+    const result = runCli(
+      ["--run-id", runId, "--runs-dir", runsDir, "--stem", stem],
+      JSON.stringify(baseFragment()),
+    );
+    expect(result.status).toBe(0);
+    const expected = path.join(runsDir, runId, "fragments", `${stem}.json`);
+    expect(fs.existsSync(expected)).toBe(true);
+    // stdout reports the absolute path the file was written to.
+    expect(result.stdout.trim()).toBe(path.resolve(expected));
+    const written = JSON.parse(fs.readFileSync(expected, "utf-8"));
+    expect(written.sourcetype).toBe("github-pr");
+    expect(written.subsystem).toBe("cpk-runtime");
+  });
+
+  // T8b — no --stem, valid input → exit 0, file present at claimSlug-derived path.
+  it("T8b: exits 0 and derives the stem from canonical-key components when --stem is omitted", () => {
+    const result = runCli(
+      ["--run-id", runId, "--runs-dir", runsDir],
+      JSON.stringify(baseFragment()),
+    );
+    expect(result.status).toBe(0);
+    // The derived stem is claimSlug("<sourcetype>:<subsystem>:<claim-slug>"), where
+    // the inner claim-slug comes from claimSlugHint (preferred) or title.
+    const inner = claimSlug("explicit-hint-wins");
+    const expectedStem = claimSlug(`github-pr:cpk-runtime:${inner}`);
+    const expected = path.join(
+      runsDir,
+      runId,
+      "fragments",
+      `${expectedStem}.json`,
+    );
+    expect(fs.existsSync(expected)).toBe(true);
+    expect(result.stdout.trim()).toBe(path.resolve(expected));
+  });
+
+  // T8c — non-JSON stdin → exit 1, stderr mentions JSON.
+  it("T8c: exits 1 on un-parseable stdin", () => {
+    const result = runCli(
+      ["--run-id", runId, "--runs-dir", runsDir, "--stem", "bad-json"],
+      "not-json{",
+    );
+    expect(result.status).toBe(1);
+    expect(result.stderr.toLowerCase()).toMatch(/json/);
+    // The fragments dir for this run should not have been created on a bail
+    // BEFORE the schema step.
+    const fragsDir = path.join(runsDir, runId, "fragments");
+    expect(fs.existsSync(fragsDir)).toBe(false);
+  });
+
+  // T8d — valid JSON, missing required field (no `content`) → exit 3.
+  it("T8d: exits 3 when the input fails CandidateFragmentSchema", () => {
+    const bad = baseFragment();
+    delete (bad as { content?: unknown }).content;
+    const result = runCli(
+      ["--run-id", runId, "--runs-dir", runsDir, "--stem", "schema-bad"],
+      JSON.stringify(bad),
+    );
+    expect(result.status).toBe(3);
+    expect(result.stderr.toLowerCase()).toMatch(/schema|content/);
+    const expected = path.join(runsDir, runId, "fragments", "schema-bad.json");
+    expect(fs.existsSync(expected)).toBe(false);
+  });
+
+  // T8e — episodic input with needsReview=false → exit 4 (episodic invariant).
+  it("T8e: exits 4 on an episodic invariant violation (needsReview=false)", () => {
+    // An episodic fragment that satisfies the BASE schema but violates the
+    // episodic refinements: needsReview must be true, provenance_class must be
+    // "derived", confidence must be "low", validation_status must be
+    // "unverified". We flip needsReview only — the rest are episodic-shaped
+    // already — so the first failing invariant is needsReview.
+    const episodic = baseFragment({
+      sourcetype: "episodic",
+      needsReview: false, // ← the failing invariant
+      provenance: {
+        source: "episodic",
+        date: "2026-06-08",
+        classification: {
+          sensitivity: "internal",
+          knowledge_type: "architecture",
+          audience: "all-staff",
+          validation_status: "unverified",
+          confidence: "low",
+          provenance_class: "derived",
+          freshness: { as_of: "2026-06-08" },
+        },
+      },
+    });
+    const result = runCli(
+      ["--run-id", runId, "--runs-dir", runsDir, "--stem", "episodic-bad"],
+      JSON.stringify(episodic),
+    );
+    expect(result.status).toBe(4);
+    expect(result.stderr.toLowerCase()).toMatch(/needsreview|episodic/);
+    const expected = path.join(
+      runsDir,
+      runId,
+      "fragments",
+      "episodic-bad.json",
+    );
+    expect(fs.existsSync(expected)).toBe(false);
+  });
+
+  // T11 — second write to same stem → exit 2 with EEXIST-style error.
+  it("T11: exits 2 on stem collision (second write to the same stem)", () => {
+    const args = [
+      "--run-id",
+      runId,
+      "--runs-dir",
+      runsDir,
+      "--stem",
+      "collide-me",
+    ];
+    const first = runCli(args, JSON.stringify(baseFragment()));
+    expect(first.status).toBe(0);
+    const second = runCli(args, JSON.stringify(baseFragment()));
+    expect(second.status).toBe(2);
+    // Tighten beyond `/already exists/i`: kernel mkdir-EEXIST text ALSO contains
+    // "file already exists", so a regression that re-collapses the mkdir+write
+    // try-blocks would silently pass with the loose regex. Pin to OUR exit-2
+    // wording — `${stem}.json already exists at ${filePath}` — which the kernel
+    // EEXIST string does NOT emit.
+    expect(second.stderr).toMatch(/\.json already exists at /);
+  });
+
+  // T-1 boundary tests: episodic + base-schema (invalid_type / invalid_enum_value)
+  // failures must route to exit 3, not exit 4. The exit-4 lane is reserved for
+  // refinement (code: "custom") issues from EpisodicCandidateFragmentSchema's
+  // `.refine(...)` calls. A wrong-typed `needsReview` (string instead of bool)
+  // surfaces as `invalid_type` from the BASE schema and is a schema-validation
+  // failure, NOT an episodic invariant violation.
+
+  // Build an episodic fragment that satisfies the four refinements (so the only
+  // failure surfaced is the caller-injected base-schema breakage).
+  function baseEpisodicFragment(
+    overrides: Record<string, unknown> = {},
+  ): Record<string, unknown> {
+    return {
+      sourcetype: "episodic",
+      subsystem: "cpk-runtime",
+      claimSlugHint: "episodic-claim",
+      source_name: "episodic",
+      repo_url: "https://github.com/CopilotKit/CopilotKit",
+      ref: "main",
+      title: "An episodic observation",
+      content: "why/how prose",
+      provenance: {
+        source: "episodic",
+        date: "2026-06-08",
+        classification: {
+          sensitivity: "internal",
+          knowledge_type: "architecture",
+          audience: "all-staff",
+          validation_status: "unverified",
+          confidence: "low",
+          provenance_class: "derived",
+          freshness: { as_of: "2026-06-08" },
+        },
+      },
+      evidence: [],
+      needsReview: true,
+      validationTargets: [],
+      ...overrides,
+    };
+  }
+
+  // T-1.a — episodic + needsReview as a string → invalid_type → exit 3 (NOT 4).
+  it("T-1.a: exits 3 when episodic needsReview is a string (base-schema invalid_type)", () => {
+    const bad = baseEpisodicFragment({ needsReview: "true" });
+    const result = runCli(
+      ["--run-id", runId, "--runs-dir", runsDir, "--stem", "t1a"],
+      JSON.stringify(bad),
+    );
+    expect(result.status).toBe(3);
+    expect(result.stderr.toLowerCase()).toMatch(/schema/);
+  });
+
+  // T-1.b — episodic + confidence as a number → invalid_type → exit 3 (NOT 4).
+  it("T-1.b: exits 3 when episodic confidence is a number (base-schema invalid_type)", () => {
+    const bad = baseEpisodicFragment();
+    (
+      bad.provenance as { classification: Record<string, unknown> }
+    ).classification.confidence = 5;
+    const result = runCli(
+      ["--run-id", runId, "--runs-dir", runsDir, "--stem", "t1b"],
+      JSON.stringify(bad),
+    );
+    expect(result.status).toBe(3);
+    expect(result.stderr.toLowerCase()).toMatch(/schema/);
+  });
+
+  // T-1.c — episodic + confidence as a non-enum string → invalid_enum_value → exit 3.
+  it("T-1.c: exits 3 when episodic confidence is a non-enum string (base-schema invalid_enum_value)", () => {
+    const bad = baseEpisodicFragment();
+    (
+      bad.provenance as { classification: Record<string, unknown> }
+    ).classification.confidence = "made-up";
+    const result = runCli(
+      ["--run-id", runId, "--runs-dir", runsDir, "--stem", "t1c"],
+      JSON.stringify(bad),
+    );
+    expect(result.status).toBe(3);
+    expect(result.stderr.toLowerCase()).toMatch(/schema/);
+  });
+
+  // T-1.d (positive) — episodic fragments that satisfy the base schema but
+  // fail exactly ONE of the four `.refine(...)` invariants must each route to
+  // exit 4 with stderr naming the offending field. Parametrized over all four
+  // invariants — needsReview, provenance_class, confidence, validation_status
+  // — so a future regression that shrinks EPISODIC_INVARIANT_FIELDS (e.g. back
+  // to needsReview-only) is caught. Each case mutates `baseEpisodicFragment()`
+  // (which satisfies all four refinements) along exactly ONE axis.
+  it.each([
+    {
+      field: "needsReview",
+      stem: "t1d-needsreview",
+      mutate: (frag: Record<string, unknown>) => {
+        frag.needsReview = false;
+      },
+    },
+    {
+      field: "provenance_class",
+      stem: "t1d-provclass",
+      mutate: (frag: Record<string, unknown>) => {
+        (
+          frag.provenance as { classification: Record<string, unknown> }
+        ).classification.provenance_class = "primary";
+      },
+    },
+    {
+      field: "confidence",
+      stem: "t1d-confidence",
+      mutate: (frag: Record<string, unknown>) => {
+        (
+          frag.provenance as { classification: Record<string, unknown> }
+        ).classification.confidence = "high";
+      },
+    },
+    {
+      field: "validation_status",
+      stem: "t1d-valstatus",
+      mutate: (frag: Record<string, unknown>) => {
+        (
+          frag.provenance as { classification: Record<string, unknown> }
+        ).classification.validation_status = "source-verified";
+      },
+    },
+  ])(
+    "T-1.d: exits 4 when episodic violates the $field invariant (refinement custom-issue)",
+    ({ field, stem, mutate }) => {
+      const bad = baseEpisodicFragment();
+      mutate(bad);
+      const result = runCli(
+        ["--run-id", runId, "--runs-dir", runsDir, "--stem", stem],
+        JSON.stringify(bad),
+      );
+      expect(result.status).toBe(4);
+      // stderr must name the offending field — the ZodError's issue path
+      // includes the field name verbatim. Asserts that the gate genuinely
+      // covers THIS invariant (not just that exit 4 fired for some reason).
+      expect(result.stderr).toContain(field);
+      // Sanity: the "episodic invariant violation" label is always present.
+      expect(result.stderr.toLowerCase()).toContain("episodic invariant");
+    },
+  );
+
+  // M-2 — AND-case routing: a fragment that fails BOTH a base-schema
+  // constraint (confidence as number → invalid_type) AND an episodic
+  // refinement (needsReview=false → custom) must route to exit 3. Per
+  // spec §4.2.1, a base-schema failure means the fragment isn't valid
+  // CandidateFragment shape at all — the refinement verdict is moot, so
+  // exit 3 (base-schema) wins over exit 4 (refinement) in the AND case.
+  it("M-2: exits 3 when episodic input fails BOTH base-schema (confidence=number) AND a refinement (needsReview=false)", () => {
+    const bad = baseEpisodicFragment({ needsReview: false });
+    (
+      bad.provenance as { classification: Record<string, unknown> }
+    ).classification.confidence = 5;
+    const result = runCli(
+      ["--run-id", runId, "--runs-dir", runsDir, "--stem", "m2"],
+      JSON.stringify(bad),
+    );
+    expect(result.status).toBe(3);
+    expect(result.stderr.toLowerCase()).toMatch(/expected|invalid|schema/);
+  });
+
+  // M-4 — `--stdin` is accepted as a no-op flag for spec-literal
+  // invocation compatibility. The literal invocation in §4.2.1 reads
+  // `atlas harvest write-fragment --run-id <id> --fragment-id <stem>
+  // --stdin`, so the CLI must accept `--stdin` without erroring. stdin
+  // is always read regardless of the flag.
+  it("M-4: exits 0 when --stdin is passed as a no-op flag (spec-literal invocation)", () => {
+    const result = runCli(
+      [
+        "--run-id",
+        runId,
+        "--runs-dir",
+        runsDir,
+        "--stem",
+        "m4-stdin",
+        "--stdin",
+      ],
+      JSON.stringify(baseFragment()),
+    );
+    expect(result.status).toBe(0);
+    const expected = path.join(runsDir, runId, "fragments", "m4-stdin.json");
+    expect(fs.existsSync(expected)).toBe(true);
+  });
+
+  // T-5 — mkdir failure vs write EEXIST must be disambiguated. If the
+  // `<runs-dir>/<run-id>/fragments` PATH already exists as a regular file,
+  // mkdirSync({recursive:true}) raises EEXIST. That is NOT a stem collision;
+  // it is an operator-environment problem (exit 1). Only an EEXIST from the
+  // write step (file at the resolved stem path exists) is a stem collision.
+  it("T-5: exits 1 when the fragments dir path is occupied by a regular file (mkdir-class failure, NOT exit 2)", () => {
+    // Pre-create `<runsDir>/<runId>/fragments` as a file so mkdirSync trips
+    // EEXIST against a non-dir.
+    const runDir = path.join(runsDir, runId);
+    fs.mkdirSync(runDir, { recursive: true });
+    fs.writeFileSync(path.join(runDir, "fragments"), "occupied\n");
+
+    const result = runCli(
+      ["--run-id", runId, "--runs-dir", runsDir, "--stem", "t5"],
+      JSON.stringify(baseFragment()),
+    );
+    expect(result.status).toBe(1);
+    // Error is named as a mkdir-class failure (not a write-class one) and
+    // identifies the fragments directory path. The exit-2 message format is
+    // `${stem}.json already exists at ${filePath}` — assert that the stem
+    // wording is absent so a regression that re-collapses the two try-blocks
+    // (mis-routing mkdir-EEXIST as a stem collision) is caught.
+    expect(result.stderr.toLowerCase()).toMatch(/mkdir/);
+    expect(result.stderr.toLowerCase()).toMatch(/fragments/);
+    expect(result.stderr).not.toMatch(/\.json already exists at /);
+  });
+
+  // T-R4-4 — `--stem` value is interpolated into a filesystem path. Without a
+  // filesystem-safe regex gate, `--stem ../../evil` yields a write OUTSIDE the
+  // fragments directory. Per spec §4.2.1 exit-code matrix, this is the
+  // operator/input class — exit 1 with stem-validation error wording, BEFORE
+  // the mkdir/write attempt.
+  it("T-R4-4: exits 1 when --stem contains path-traversal characters (../, /, leading dot, etc.)", () => {
+    // Spec §4.2.1: filesystem-safe stems only. Operator/LLM-generated stems with
+    // `../`, `/`, leading-dot, or other path-traversal sequences must be rejected
+    // BEFORE the mkdir/write attempt to prevent writes outside <fragmentsDir>.
+    const traversalStem = "../../evil";
+    const result = runCli(
+      ["--run-id", runId, "--runs-dir", runsDir, "--stem", traversalStem],
+      JSON.stringify(baseFragment()),
+    );
+    expect(result.status).toBe(1);
+    expect(result.stderr.toLowerCase()).toMatch(/stem|invalid|traversal/);
+    // The error message should name the stem-validation failure, not a mkdir/write error.
+    expect(result.stderr).not.toMatch(/mkdir/);
+    expect(result.stderr).not.toMatch(/\.json already exists at /);
+  });
+
+  it("T-R4-4: accepts a filesystem-safe stem (alphanumeric + . _ -)", () => {
+    // Safe stem characters: A-Z a-z 0-9 . _ - (no path separators, no leading dot,
+    // no traversal sequences).
+    const result = runCli(
+      ["--run-id", runId, "--runs-dir", runsDir, "--stem", "valid-stem.123_ok"],
+      JSON.stringify(baseFragment()),
+    );
+    expect(result.status).toBe(0);
+  });
+
+  // T-R5-2 — STEM_PATTERN negative-test coverage hardening. T-R4-4 covers ONE
+  // axis (leading-dot traversal). A regex weakening that admits `/` mid-string
+  // or other path-component shapes would not be caught by T-R4-4 alone. These
+  // three tests pin the additional STEM_PATTERN rejection axes so any future
+  // edit that broadens the character class produces a visible regression.
+  it("T-R5-2: exits 1 when --stem contains a mid-string path separator (foo/bar)", () => {
+    // Mid-string `/` is the most dangerous regex-weakening vector: a stem like
+    // `foo/bar` would write to `<fragmentsDir>/foo/bar.json` and could be
+    // chained with `..` to escape. STEM_PATTERN's `[A-Za-z0-9._-]` body class
+    // does NOT include `/`, so this must reject.
+    const result = runCli(
+      ["--run-id", runId, "--runs-dir", runsDir, "--stem", "foo/bar"],
+      JSON.stringify(baseFragment()),
+    );
+    expect(result.status).toBe(1);
+    expect(result.stderr.toLowerCase()).toMatch(/stem|invalid/);
+    expect(result.stderr).not.toMatch(/mkdir/);
+  });
+
+  it("T-R5-2: exits 1 when --stem has a leading path separator (/absolute/path)", () => {
+    // A leading `/` would resolve `path.join(fragmentsDir, "/absolute/path.json")`
+    // to an absolute escape. STEM_PATTERN's leading-character anchor requires
+    // `[A-Za-z0-9]`, so this must reject.
+    const result = runCli(
+      ["--run-id", runId, "--runs-dir", runsDir, "--stem", "/absolute/path"],
+      JSON.stringify(baseFragment()),
+    );
+    expect(result.status).toBe(1);
+    expect(result.stderr.toLowerCase()).toMatch(/stem|invalid/);
+    expect(result.stderr).not.toMatch(/mkdir/);
+  });
+
+  it("T-R5-2: exits 1 when --stem has a leading double-dot (..foo)", () => {
+    // The leading-character anchor `[A-Za-z0-9]` excludes `.`, so a stem
+    // starting with `..` (the classic traversal prefix) is rejected by the
+    // leading-anchor — independently of any body-position `..` permissiveness.
+    const result = runCli(
+      ["--run-id", runId, "--runs-dir", runsDir, "--stem", "..foo"],
+      JSON.stringify(baseFragment()),
+    );
+    expect(result.status).toBe(1);
+    expect(result.stderr.toLowerCase()).toMatch(/stem|invalid/);
+    expect(result.stderr).not.toMatch(/mkdir/);
+  });
+});
+
+// Direct unit tests on the `isEpisodicInvariantIssue` predicate. These exist
+// because the AND-case precedence rule (any non-custom issue downgrades the
+// whole ZodError to exit 3) is not exercisable through the CLI integration
+// path: Zod's base-parse short-circuits on `invalid_type` BEFORE refinements
+// run, so a real fragment can never produce a mixed-code ZodError via the
+// episodic-parse codepath. The predicate, however, is a defensive guard for
+// the spec contract (§4.2.1) and any future code that COULD pass a mixed
+// ZodError (e.g. a custom parse path that runs base + refinements together).
+// Test (c) is the regression-armor for that contract.
+describe("isEpisodicInvariantIssue: AND-case precedence direct unit tests", () => {
+  it("returns FALSE for a pure base-schema failure (invalid_type only)", () => {
+    const err = new z.ZodError([
+      {
+        code: z.ZodIssueCode.invalid_type,
+        expected: "boolean",
+        received: "string",
+        path: ["needsReview"],
+        message: "Expected boolean, received string",
+      },
+    ]);
+    expect(isEpisodicInvariantIssue(err)).toBe(false);
+  });
+
+  it("returns TRUE for a pure refinement failure (custom only, path matches EPISODIC_INVARIANT_FIELDS)", () => {
+    const err = new z.ZodError([
+      {
+        code: z.ZodIssueCode.custom,
+        path: ["needsReview"],
+        message: "needsReview must be true when validation_status is pending",
+      },
+    ]);
+    expect(isEpisodicInvariantIssue(err)).toBe(true);
+  });
+
+  it("returns FALSE for a mixed-code ZodError (custom + invalid_type) — base-schema wins per §4.2.1 precedence", () => {
+    const err = new z.ZodError([
+      {
+        code: z.ZodIssueCode.custom,
+        path: ["needsReview"],
+        message: "needsReview must be true when validation_status is pending",
+      },
+      {
+        code: z.ZodIssueCode.invalid_type,
+        expected: "number",
+        received: "string",
+        path: ["confidence"],
+        message: "Expected number, received string",
+      },
+    ]);
+    expect(isEpisodicInvariantIssue(err)).toBe(false);
+  });
+
+  it("returns FALSE for a custom issue whose path-last is NOT in EPISODIC_INVARIANT_FIELDS", () => {
+    const err = new z.ZodError([
+      {
+        code: z.ZodIssueCode.custom,
+        path: ["title"],
+        message: "title must not contain a subsystem delimiter",
+      },
+    ]);
+    expect(isEpisodicInvariantIssue(err)).toBe(false);
+  });
+
+  // Positive per-invariant coverage. The existing (b) case above pins
+  // needsReview; the next three pin the remaining three EPISODIC_INVARIANT_FIELDS
+  // (provenance_class, confidence, validation_status). Path shapes mirror the
+  // refines on EpisodicCandidateFragmentSchema in src/atlas/types.ts — the three
+  // classification-nested refines emit FULL nested paths
+  // (["provenance","classification",<field>]), while needsReview uses the
+  // single-element path. The predicate matches on path[path.length-1], so the
+  // leaf form would also suffice; using the actual refine path-shape keeps
+  // these tests faithful to what production ZodErrors look like.
+  it("returns TRUE for a pure custom failure on provenance_class invariant (nested path)", () => {
+    const err = new z.ZodError([
+      {
+        code: z.ZodIssueCode.custom,
+        path: ["provenance", "classification", "provenance_class"],
+        message: "episodic requires provenance_class=derived",
+      },
+    ]);
+    expect(isEpisodicInvariantIssue(err)).toBe(true);
+  });
+
+  it("returns TRUE for a pure custom failure on confidence invariant (nested path)", () => {
+    const err = new z.ZodError([
+      {
+        code: z.ZodIssueCode.custom,
+        path: ["provenance", "classification", "confidence"],
+        message: "episodic requires confidence=low (clamped)",
+      },
+    ]);
+    expect(isEpisodicInvariantIssue(err)).toBe(true);
+  });
+
+  it("returns TRUE for a pure custom failure on validation_status invariant (nested path)", () => {
+    const err = new z.ZodError([
+      {
+        code: z.ZodIssueCode.custom,
+        path: ["provenance", "classification", "validation_status"],
+        message: "episodic requires validation_status=unverified",
+      },
+    ]);
+    expect(isEpisodicInvariantIssue(err)).toBe(true);
+  });
+
+  // Defensive edge case: an empty-issues ZodError. The predicate's
+  // `issues.length === 0` early-return must hold — an empty ZodError carries
+  // no invariant signal and must NOT route to exit 4. Routes to exit 3 (base
+  // lane) by default per §4.2.1.
+  it("returns FALSE for an empty-issues ZodError (defensive edge case)", () => {
+    const err = new z.ZodError([]);
+    expect(isEpisodicInvariantIssue(err)).toBe(false);
+  });
+});
diff --git a/src/atlas/harvest-cli.ts b/src/atlas/harvest-cli.ts
index aaa7bdf..7cc9d8f 100644
--- a/src/atlas/harvest-cli.ts
+++ b/src/atlas/harvest-cli.ts
@@ -38,6 +38,13 @@ import { fileURLToPath } from "node:url";
 import { Command, CommanderError, Option } from "commander";
 import { Client } from "@notionhq/client";
 
+// ── Schemas (write-fragment subcommand) ────────────────────────────────────────
+import {
+  CandidateFragmentSchema,
+  EpisodicCandidateFragmentSchema,
+} from "./types.js";
+import { claimSlug } from "./canonicalize.js";
+
 // ── The seven leaf adapters — imported HERE and nowhere else (assembly point) ──
 import { memoryAdapter } from "./adapters/memory.js";
 import { githubAdapter } from "./adapters/github.js";
@@ -619,6 +626,266 @@ async function reindexCommand(
   );
 }
 
+// ── write-fragment subcommand (spec §4.2) ──────────────────────────────────────
+//
+// Read a single CandidateFragment JSON object from stdin, validate it against
+// the appropriate family schema (`CandidateFragmentSchema` for non-episodic,
+// `EpisodicCandidateFragmentSchema` for episodic — the episodic schema layers
+// the four episodic-invariant refinements on top of the base), and write the
+// validated (and possibly sensitivity-coerced) fragment EXCLUSIVELY to
+// `<runs-dir>/<run-id>/fragments/<stem>.json`.
+//
+// `--stem` is OPTIONAL: when omitted, the stem is derived from the fragment's
+// canonical-key components (`claimSlug(<sourcetype>:<subsystem>:claimSlug(claimSlugHint || title))`)
+// so two fragments with the same claim text but different sourcetype/subsystem
+// don't collide. The derived stem is itself idempotent across the canonicalize
+// path (claimSlug normalizes case/punctuation).
+//
+// Exit-code matrix (spec §4.2.1):
+//   0 — success (fragment written; absolute path printed to stdout)
+//   1 — stdin/IO failure (bad JSON, unreadable stdin, write error other than EEXIST)
+//   2 — stem collision (file already exists; exclusive-create fails with EEXIST)
+//   3 — schema validation failure (base CandidateFragmentSchema rejected the input,
+//       OR an episodic input whose Zod error path is NOT one of the four episodic
+//       invariants — i.e. a base-schema failure surfaced through the episodic parse)
+//   4 — episodic invariant violation (sourcetype === "episodic" AND the Zod error
+//       path identifies one of the four episodic invariants: needsReview,
+//       provenance_class, confidence, validation_status)
+//
+// The fail-loud rule: stderr always carries the underlying error message; the
+// exit code distinguishes the FAILURE CLASS so the caller (leaf adapter, CI
+// gate) can route accordingly.
+
+const EPISODIC_INVARIANT_FIELDS = new Set([
+  "needsReview",
+  "provenance_class",
+  "confidence",
+  "validation_status",
+]);
+
+interface WriteFragmentCliOptions {
+  runId?: string;
+  runsDir?: string;
+  stem?: string;
+}
+
+// Read the entirety of an async iterable stream into a utf-8 string. Bounded
+// only by available memory — fragments are small (a few KB each) so a full
+// read is fine; streaming-parse would add complexity for zero benefit.
+async function readAllStdin(): Promise<string> {
+  const chunks: Buffer[] = [];
+  for await (const chunk of process.stdin) {
+    chunks.push(typeof chunk === "string" ? Buffer.from(chunk) : chunk);
+  }
+  return Buffer.concat(chunks).toString("utf-8");
+}
+
+// Inspect a ZodError's issues and decide whether the parse failure is purely
+// an episodic-invariant refinement violation (exit 4) versus a base-schema
+// failure that surfaced through the episodic parse (exit 3). The episodic
+// schema's refinement paths are authored explicitly (see
+// EpisodicCandidateFragmentSchema in types.ts); the four `.refine(...)` calls
+// all emit Zod issues with `code: "custom"` (the default for refinements).
+//
+// Routing rules (per spec §4.2.1):
+//   - Per-issue gate: only `code: "custom"` issues whose path-last lands on
+//     one of EPISODIC_INVARIANT_FIELDS are candidates for exit 4. invalid_type
+//     / invalid_enum_value / invalid_literal / unrecognized_keys etc. are
+//     base-schema issues and route to exit 3 even when they land on a
+//     refinement-named field.
+//   - AND-case precedence: if ANY issue in the same ZodError is a non-custom
+//     base-schema issue, the fragment isn't even valid CandidateFragment
+//     shape, so the refinement verdict is moot — route to exit 3. Exit 3
+//     ALWAYS wins over exit 4 when both apply.
+//
+// Exported for direct unit-testing of the AND-case precedence predicate;
+// production callers reach it through the write-fragment command body below.
+export function isEpisodicInvariantIssue(
+  error: unknown,
+): error is { issues: Array<{ path: (string | number)[]; message: string }> } {
+  if (!error || typeof error !== "object") return false;
+  const issues = (error as { issues?: unknown }).issues;
+  if (!Array.isArray(issues) || issues.length === 0) return false;
+  // AND-case precedence: any non-custom issue downgrades the whole ZodError
+  // to exit 3. A base-schema failure (invalid_type / invalid_enum_value /
+  // invalid_literal / unrecognized_keys / etc.) means the fragment isn't a
+  // valid CandidateFragment at all — the episodic-refinement verdict is moot.
+  if (issues.some((issue) => (issue as { code?: unknown }).code !== "custom")) {
+    return false;
+  }
+  // All issues are `code: "custom"`. At least one must point at an episodic
+  // invariant for this to route to exit 4. A custom issue whose path-last is
+  // NOT in EPISODIC_INVARIANT_FIELDS (e.g. the subsystem-delimiter refine on
+  // the base CandidateFragmentSchema) is a base-schema-class refinement and
+  // still routes to exit 3.
+  return issues.some((issue) => {
+    const path = (issue as { path?: (string | number)[] }).path;
+    if (!Array.isArray(path) || path.length === 0) return false;
+    const last = path[path.length - 1];
+    return typeof last === "string" && EPISODIC_INVARIANT_FIELDS.has(last);
+  });
+}
+
+// The write-fragment command body. Returns the exit code per §4.2.1; never
+// throws — all failure classes are routed through the exit-code matrix.
+export async function writeFragmentCommand(
+  options: WriteFragmentCliOptions,
+  writeOut: WriteFn,
+  writeErr: WriteFn,
+  stdinReader: () => Promise<string> = readAllStdin,
+): Promise<number> {
+  if (!options.runId) {
+    writeErr("atlas-harvest write-fragment: --run-id is required\n");
+    return 1;
+  }
+  if (!options.runsDir) {
+    writeErr("atlas-harvest write-fragment: --runs-dir is required\n");
+    return 1;
+  }
+
+  // 1. Read + JSON-parse stdin. Both stdin IO and JSON parse failures are
+  //    exit 1 (stdin/IO class).
+  let raw: string;
+  try {
+    raw = await stdinReader();
+  } catch (err) {
+    writeErr(
+      `atlas-harvest write-fragment: stdin read failed: ${formatCliError(err)}\n`,
+    );
+    return 1;
+  }
+  let parsed: unknown;
+  try {
+    parsed = JSON.parse(raw);
+  } catch (err) {
+    writeErr(
+      `atlas-harvest write-fragment: stdin JSON parse failed: ${formatCliError(err)}\n`,
+    );
+    return 1;
+  }
+
+  // 2. Pick schema family by the fragment's `sourcetype` field. Inspect
+  //    BEFORE parsing — we need the family to decide which schema to run and
+  //    which exit-code class (3 vs 4) a failure maps to.
+  const sourcetype =
+    parsed && typeof parsed === "object"
+      ? (parsed as { sourcetype?: unknown }).sourcetype
+      : undefined;
+  const isEpisodic = sourcetype === "episodic";
+  const schema = isEpisodic
+    ? EpisodicCandidateFragmentSchema
+    : CandidateFragmentSchema;
+
+  // 3. Parse against the chosen schema. On failure:
+  //      - non-episodic OR an episodic base-schema failure → exit 3
+  //      - episodic invariant refinement failure → exit 4
+  const result = schema.safeParse(parsed);
+  if (!result.success) {
+    const exitCode =
+      isEpisodic && isEpisodicInvariantIssue(result.error) ? 4 : 3;
+    const label =
+      exitCode === 4
+        ? "episodic invariant violation"
+        : "schema validation failure";
+    writeErr(
+      `atlas-harvest write-fragment: ${label}: ${formatCliError(result.error)}\n`,
+    );
+    return exitCode;
+  }
+  const fragment = result.data as { sourcetype: string; subsystem: string };
+
+  // 4. Resolve the stem — explicit `--stem` wins; otherwise derive from the
+  //    fragment's canonical-key components (claimSlug normalizes the joined
+  //    `claimSlug(<sourcetype>:<subsystem>:claimSlug(claimSlugHint || title))`
+  //    to a filesystem-safe slug).
+  let stem: string;
+  if (options.stem !== undefined && options.stem !== "") {
+    stem = options.stem;
+  } else {
+    const fragWithClaim = result.data as {
+      sourcetype: string;
+      subsystem: string;
+      claimSlugHint?: string;
+      title: string;
+    };
+    const claim = claimSlug(fragWithClaim.claimSlugHint || fragWithClaim.title);
+    stem = claimSlug(
+      `${fragWithClaim.sourcetype}:${fragWithClaim.subsystem}:${claim}`,
+    );
+  }
+
+  // 4a. Filesystem-safe stem gate (spec §4.2.1, T-R4-4, T-R5-2). `--stem`
+  //     flows into `path.join(fragmentsDir, ...)` and an unvalidated value
+  //     like `../../evil` writes OUTSIDE the fragments directory. The
+  //     `STEM_PATTERN` regex below enforces:
+  //       - First character must be alphanumeric `[A-Za-z0-9]`. This blocks
+  //         leading-dot hidden-file values (`.hidden`), leading-dash
+  //         flag-confusable values (`-flag`), AND any leading-`..` traversal
+  //         prefix (because `.` is not in the leading char class).
+  //       - Subsequent characters limited to `[A-Za-z0-9._-]`. Any path
+  //         separator (`/`, `\`) is rejected because it's outside the body
+  //         class — so a stem cannot construct a multi-component path at all.
+  //     Note: a substring `..` is permitted in the body (e.g. `foo..bar`),
+  //     but is operationally safe — with no `/` separator available, it
+  //     cannot construct a traversal sequence to escape `fragmentsDir`.
+  //     This is the operator/input class — exit 1, BEFORE the mkdir/write
+  //     attempt.
+  const STEM_PATTERN = /^[A-Za-z0-9][A-Za-z0-9._-]*$/;
+  if (!STEM_PATTERN.test(stem)) {
+    writeErr(
+      `atlas-harvest write-fragment: invalid stem "${stem}" — must match ${STEM_PATTERN}\n`,
+    );
+    return 1;
+  }
+
+  // 5. Write EXCLUSIVELY under `<runs-dir>/<run-id>/fragments/<stem>.json`.
+  //    The mkdir step and the write step are intentionally NOT collapsed into
+  //    one try/catch — they have DIFFERENT exit-code classes:
+  //
+  //      - mkdir failure (EEXIST against a non-dir path, EACCES, ENOSPC, ...)
+  //        is an operator-environment problem and routes to exit 1.
+  //      - writeFileSync EEXIST (file at the resolved stem path already
+  //        exists) is the spec-intended "stem collision" case and routes to
+  //        exit 2.
+  //      - Any other writeFileSync failure (EACCES, ENOSPC, ...) is also
+  //        exit 1.
+  //
+  //    Collapsing them would mis-route mkdir-EEXIST to exit 2 and mis-label
+  //    mkdir-class IO errors as "write failed" (wrong syscall name).
+  const fragmentsDir = path.join(options.runsDir, options.runId, "fragments");
+  const filePath = path.join(fragmentsDir, `${stem}.json`);
+  try {
+    fs.mkdirSync(fragmentsDir, { recursive: true });
+  } catch (err) {
+    writeErr(
+      `atlas-harvest write-fragment: mkdir failed for fragments dir ${fragmentsDir}: ${formatCliError(err)}\n`,
+    );
+    return 1;
+  }
+  try {
+    fs.writeFileSync(filePath, `${JSON.stringify(result.data, null, 2)}\n`, {
+      encoding: "utf-8",
+      flag: "wx",
+    });
+  } catch (err) {
+    if ((err as NodeJS.ErrnoException).code === "EEXIST") {
+      writeErr(
+        `atlas-harvest write-fragment: ${stem}.json already exists at ${filePath}\n`,
+      );
+      return 2;
+    }
+    writeErr(
+      `atlas-harvest write-fragment: write failed for ${filePath}: ${formatCliError(err)}\n`,
+    );
+    return 1;
+  }
+
+  writeOut(`${path.resolve(filePath)}\n`);
+  // Fragment received subsystem field — silence unused-var TS lint.
+  void fragment;
+  return 0;
+}
+
 // Format a CLI error for stderr, walking the `{cause}` chain (bounded depth).
 // Several pipeline failures deliberately attach the underlying error as
 // `cause` — e.g. rag-dedup's consecutive-probe fail-fast wraps the ACTUAL
@@ -756,9 +1023,50 @@ export async function runAtlasHarvestCli(
       await reindexCommand(options, writeOut);
     });
 
+  // The write-fragment subcommand has its OWN exit-code matrix (§4.2.1: 0/1/2/3/4)
+  // that the standard commander error path cannot express. The action closes over
+  // this slot and the outer return picks it up.
+  let writeFragmentExitCode: number | undefined;
+
+  program
+    .command("write-fragment")
+    .description(
+      "Read a CandidateFragment from stdin and write it under " +
+        "<runs-dir>/<run-id>/fragments/<stem>.json. When --stem is omitted, " +
+        "the stem is derived as " +
+        "claimSlug(<sourcetype>:<subsystem>:claimSlug(claimSlugHint || title)). " +
+        "Exit codes per spec §4.2.1: 0 ok, 1 stdin/IO, 2 stem collision, " +
+        "3 schema, 4 episodic invariant.",
+    )
+    .requiredOption(
+      "--run-id <id>",
+      "Run id under which the fragment is written",
+    )
+    .requiredOption(
+      "--runs-dir <dir>",
+      "Root directory of run corpora (e.g. ./runs)",
+    )
+    .option(
+      "--stem <stem>",
+      "Filesystem-safe fragment stem; if omitted, derived as " +
+        "claimSlug(<sourcetype>:<subsystem>:claimSlug(claimSlugHint || title))",
+    )
+    .option(
+      "--stdin",
+      "Read fragment from stdin (no-op; stdin is always read — accepted for " +
+        "spec-literal invocation compatibility, see §4.2.1)",
+    )
+    .action(async (options: WriteFragmentCliOptions) => {
+      writeFragmentExitCode = await writeFragmentCommand(
+        options,
+        writeOut,
+        writeErr,
+      );
+    });
+
   try {
     await program.parseAsync(argv, { from: "user" });
-    return 0;
+    return writeFragmentExitCode ?? 0;
   } catch (error) {
     if (error instanceof CommanderError) {
       return error.exitCode;

From 9c1dd5816b018c73c2c4563db639fbac9ffe98e5 Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jpr5@darkridge.com>
Date: Fri, 12 Jun 2026 11:36:52 -0700
Subject: [PATCH 5/6] Add defect-regression corpus, Zod refinement coverage
 doc, and runDualRun shadow-gate scaffold

---
 docs/atlas/refinement-coverage.md             |  64 +++
 src/__tests__/atlas-defect-regression.test.ts | 387 ++++++++++++++++
 src/__tests__/atlas-dual-run.test.ts          | 424 ++++++++++++++++++
 .../atlas-refinement-coverage.test.ts         | 138 ++++++
 src/atlas/dual-run.ts                         | 315 +++++++++++++
 5 files changed, 1328 insertions(+)
 create mode 100644 docs/atlas/refinement-coverage.md
 create mode 100644 src/__tests__/atlas-defect-regression.test.ts
 create mode 100644 src/__tests__/atlas-dual-run.test.ts
 create mode 100644 src/__tests__/atlas-refinement-coverage.test.ts
 create mode 100644 src/atlas/dual-run.ts

diff --git a/docs/atlas/refinement-coverage.md b/docs/atlas/refinement-coverage.md
new file mode 100644
index 0000000..36d5065
--- /dev/null
+++ b/docs/atlas/refinement-coverage.md
@@ -0,0 +1,64 @@
+---
+title: Atlas Zod refinement coverage
+status: living
+source: src/atlas/types.ts
+generated: 2026-06-12
+---
+
+# Atlas Zod refinement coverage
+
+This document enumerates every Zod refinement and transform currently in
+`src/atlas/types.ts` (the foundational Atlas contract). For each, it
+records whether the constraint is **JSON-Schema-expressible** (and therefore
+survives `zod-to-json-schema` conversion at orchestrator-shell boot) or
+whether it **requires a post-pass** Zod parse after JSON Schema validation
+(because it is a runtime predicate / transform that `zod-to-json-schema`
+silently drops).
+
+This file is paired with test `src/__tests__/atlas-refinement-coverage.test.ts`
+(T9 per spec §7.9). The test asserts the refinement count in this doc matches
+the refinement count counted in source — so if you add a new `.refine(...)` /
+`.superRefine(...)` / `.transform(...)` to `src/atlas/types.ts`, you MUST
+add a corresponding row here, otherwise T9 fails with a stale-doc message.
+
+## Refinement table
+
+| Refinement                                                              | Schema                                        | JSON-Schema-expressible?                                                   | Post-pass note                                                                                                                                                                                                                                                                                                                                                                                                      |
+| ----------------------------------------------------------------------- | --------------------------------------------- | -------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `subsystemHasNoDelimiter` (fragment)                                    | `CandidateFragmentSchema` (line ~145)         | No (runtime predicate over a string body)                                  | Rejects when `subsystem` contains `:`, `⟦`, or `⟧`. JSON Schema cannot express a predicate over unicode delimiters as a portable `pattern`. Enforced by the CLI helper's post-pass `CandidateFragmentSchema.parse(input)` step (see spec §4.2.1, STEP 2).                                                                                                                                                           |
+| `subsystemHasNoDelimiter` (finalized candidate)                         | `CandidateSchema` (line ~207)                 | No (runtime predicate)                                                     | Same predicate as the fragment row above, applied to the finalized Tier-3 `Candidate` after canonicalization. JSON Schema is not the validation surface for finalized rows — they are validated in TS by `CandidateSchema.parse(...)` — so this lives purely in Zod.                                                                                                                                                |
+| `episodic.needsReview === true`                                         | `EpisodicCandidateFragmentSchema` (line ~166) | No (semantic invariant, not structural)                                    | Rejects when `needsReview !== true`. Episodic leaves are "guilty until validated" — the per-family invariant cannot be expressed as a JSON Schema `const` on a `boolean` because the base `CandidateFragmentSchema` permits both values; only the episodic narrowing forbids `false`. Enforced as a SECOND parse via `EpisodicCandidateFragmentSchema` when `sourcetype === "episodic"` (spec §4.6).                |
+| `episodic.provenance.classification.provenance_class === "derived"`     | `EpisodicCandidateFragmentSchema` (line ~173) | No (semantic invariant)                                                    | Rejects when `provenance_class !== "derived"`. Episodic leaves can never be `"primary"`. Enforced post-pass via the episodic-narrowed schema.                                                                                                                                                                                                                                                                       |
+| `episodic.provenance.classification.confidence === "low"`               | `EpisodicCandidateFragmentSchema` (line ~177) | No (semantic invariant)                                                    | Rejects when `confidence !== "low"`. Episodic confidence is clamped to `"low"` by policy. Enforced post-pass via the episodic-narrowed schema.                                                                                                                                                                                                                                                                      |
+| `episodic.provenance.classification.validation_status === "unverified"` | `EpisodicCandidateFragmentSchema` (line ~181) | No (semantic invariant)                                                    | Rejects when `validation_status !== "unverified"`. Episodic claims are unverified by construction. Enforced post-pass via the episodic-narrowed schema.                                                                                                                                                                                                                                                             |
+| `episodic sensitivity floor` (transform)                                | `EpisodicCandidateFragmentSchema` (line ~190) | No (`.transform` mutates the parsed value; not expressible in JSON Schema) | Coerces `sensitivity === "public"` upward to `"internal"`; `"internal"` / `"proprietary"` / `"secret"` are preserved verbatim. This is a "coerce up to floor" rewrite, NOT a "reject below floor" predicate, so even the JSON-Schema `enum` shape would not catch it (the input is allowed; the value just gets rewritten before persistence). Enforced post-pass via `EpisodicCandidateFragmentSchema.parse(...)`. |
+
+## Summary
+
+- Total refinements / transforms in `src/atlas/types.ts`: **7**
+- JSON-Schema-expressible: **0**
+- Post-pass required: **7**
+
+All seven entries are runtime predicates or transforms; none survive
+`zod-to-json-schema` conversion. The CLI helper at `atlas harvest
+write-fragment --stdin` therefore re-parses every fragment through Zod
+(`CandidateFragmentSchema.parse` for base fragments, and additionally
+`EpisodicCandidateFragmentSchema.parse` when `sourcetype === "episodic"`)
+to enforce all seven. See spec §4.1.1, §4.2.1, and §4.6 for the full
+orchestration-shell vs CLI-helper split.
+
+## Future-edit note
+
+If you add a `.refine(...)`, `.superRefine(...)`, `.transform(...)`, or
+`.regex(...)` to `src/atlas/types.ts`, you must:
+
+1. Add a row to the table above describing the constraint, the host
+   schema, whether it is JSON-Schema-expressible, and where it is
+   enforced.
+2. Update the **Summary** counts.
+3. Re-run `npx vitest run src/__tests__/atlas-refinement-coverage.test.ts`
+   and confirm green.
+
+T9 fails fast on count drift so the silent-drop class of bug (a new
+runtime predicate added to `types.ts` but never wired into the CLI
+post-pass) is caught at test time, not at first failing leaf.
diff --git a/src/__tests__/atlas-defect-regression.test.ts b/src/__tests__/atlas-defect-regression.test.ts
new file mode 100644
index 0000000..b476468
--- /dev/null
+++ b/src/__tests__/atlas-defect-regression.test.ts
@@ -0,0 +1,387 @@
+import { describe, test, expect } from "vitest";
+import { CandidateFragmentSchema } from "../atlas/types.js";
+
+// ── T4 + T5 + T7 — Defect-regression corpus (spec §1.1 / §7.4 / §7.7) ─────────
+//
+// This file codifies, as parametric regression tests, every defect class
+// observed in one full-monty run that the new schema-enforcement boundary
+// (CandidateFragmentSchema as the single I/O contract) must reject. It
+// REPLACES today's repair-shim's permissive acceptance: any defect that the
+// shim used to silently coerce is now a loud Zod rejection.
+//
+// The corpus comes in three blocks:
+//
+//   T4 — 14 alias names for `provenance.classification.knowledge_type`
+//        (`kind` / `category` / `discipline` / `topic` / `domain` / `area` /
+//         `type` / `facet` / `bucket` / `class` / `subject` / `theme` /
+//         `label` / `tag`). Each fixture is otherwise-valid but substitutes
+//        the alias key for `knowledge_type`. The Zod schema rejects because
+//        `knowledge_type` is required.
+//
+//   T5 — 12 other defect rows from §1.1 (rows 2–8 and 10–14; row 9 is
+//        DROPPED per plan SLOT-5 N1, because `audience` has a default and a
+//        fragment that ONLY omits it parses successfully under the current
+//        schema — the original observation reflects an older/lossy intake
+//        path, not a current-schema rejection).
+//
+//   T7 — Integration: a happy-path fragment parses, and then for EACH of
+//        the 26 defect fixtures above, swapping the defect into the happy
+//        path yields a rejection. This ties T4 + T5 into a single
+//        comprehensive regression assertion: the same base, the same
+//        per-defect mutator, the same rejection.
+//
+// Row 14 footnote: §1.1 row 14 is "extra/unknown top-level fields silently
+// dropped." Under the BASE (non-`.strict()`) CandidateFragmentSchema, extras
+// are stripped during parse — the parse SUCCEEDS but the extras do not
+// survive into the parsed object. That is a known asymmetry vs the spec's
+// "rejects all rows 2–14" framing, and per spec NG1 we are not permitted to
+// tighten the schema in this slot. The row-14 fixture therefore asserts the
+// CURRENT contract (extras stripped, no extra-field leakage into the parsed
+// candidate), so the test still guards against drift (a future change that
+// causes extras to leak through would fail the assertion).
+
+// ── Helpers ──────────────────────────────────────────────────────────────────
+
+// Deep-clone a plain JSON-shaped object. The fixtures here are all
+// JSON-safe (no Dates, no functions, no Maps), so structured cloning via
+// JSON round-trip is sufficient and keeps each parametric case independent.
+const clone = <T>(o: T): T => JSON.parse(JSON.stringify(o)) as T;
+
+// Base happy-path fragment. T7's first assertion is that THIS parses; every
+// defect fixture below is produced by applying ONE mutator to a clone of
+// this base, so any rejection unambiguously traces to that single mutation.
+const baseHappyPath = () => ({
+  sourcetype: "memory" as const,
+  subsystem: "atlas-harvest",
+  source_name: "spec-§1.1",
+  title: "Schema-enforcement boundary catches every observed defect class",
+  content:
+    "Each defect row in §1.1 is a separate parse failure under CandidateFragmentSchema; the repair shim is no longer required.",
+  provenance: {
+    source: "atlas-leaf",
+    classification: {
+      sensitivity: "internal" as const,
+      knowledge_type: "process" as const,
+      audience: "all-staff",
+      validation_status: "unverified" as const,
+      confidence: "high" as const,
+      provenance_class: "primary" as const,
+      freshness: { as_of: "2026-06-12" },
+    },
+  },
+  evidence: [],
+  needsReview: false,
+  validationTargets: [],
+});
+
+// Serialize all zod issues into one string so per-case regex assertions can
+// match anywhere in the issue list (path OR message). Joining keeps the
+// per-case `expect(...).toMatch(...)` line readable: the assertion fails
+// with the FULL issue list pretty-printed, which makes drift diagnoses
+// obvious.
+const formatIssues = (
+  issues: ReadonlyArray<{ path: ReadonlyArray<unknown>; message: string }>,
+): string =>
+  issues
+    .map((i) => `${i.path.join(".") || "(root)"}: ${i.message}`)
+    .join(" | ");
+
+// ── T4 — 14 classification-key aliases ───────────────────────────────────────
+//
+// Each alias replaces the literal `knowledge_type` key in
+// `provenance.classification` while keeping the same valid enum VALUE
+// (`"process"`). The Zod object schema requires `knowledge_type`, so the
+// parse fails with an issue rooted at
+// `provenance.classification.knowledge_type` (required field missing). The
+// presence of the alias key itself is silently ignored (z.object strips
+// unknowns) — the rejection is driven by the required key being absent,
+// which is the right signal: it names the canonical key the model needs
+// to emit.
+
+const KNOWLEDGE_TYPE_ALIASES = [
+  "kind",
+  "category",
+  "discipline",
+  "topic",
+  "domain",
+  "area",
+  "type",
+  "facet",
+  "bucket",
+  "class",
+  "subject",
+  "theme",
+  "label",
+  "tag",
+] as const;
+
+// Build a fragment that uses `alias` instead of `knowledge_type` in
+// `provenance.classification`. Returns a plain object (not typed against
+// CandidateFragment, because by construction it does NOT satisfy the
+// inferred type).
+const fragmentWithAlias = (alias: string): unknown => {
+  const f = clone(baseHappyPath()) as Record<string, unknown> & {
+    provenance: { classification: Record<string, unknown> };
+  };
+  const c = f.provenance.classification;
+  // Move the value to the alias key and drop the canonical key.
+  c[alias] = c.knowledge_type;
+  delete c.knowledge_type;
+  return f;
+};
+
+describe("T4: classification.knowledge_type key aliases (§1.1 row 1)", () => {
+  test.each(KNOWLEDGE_TYPE_ALIASES.map((alias) => [alias] as const))(
+    "rejects fragment whose classification uses alias %s instead of knowledge_type",
+    (alias) => {
+      const fixture = fragmentWithAlias(alias);
+      const result = CandidateFragmentSchema.safeParse(fixture);
+      expect(result.success).toBe(false);
+      if (result.success) return; // type guard
+      const formatted = formatIssues(result.error.issues);
+      // The rejection must be rooted at the canonical key the model SHOULD
+      // have emitted, so the operator can read the error and fix the alias.
+      expect(formatted).toMatch(/provenance\.classification\.knowledge_type/);
+    },
+  );
+});
+
+// ── T5 — 12 other defect rows (rows 2–8, 10–14; row 9 dropped) ───────────────
+//
+// Each row is a mutator that turns the happy-path base into a single-defect
+// fixture, plus a regex the formatted-issues string must match so the
+// rejection's path/message names the offending field.
+
+interface DefectCase {
+  row: number;
+  desc: string;
+  // Mutate a CLONE of the happy-path base in place; the caller passes a
+  // fresh clone for each invocation.
+  mutate: (f: ReturnType<typeof baseHappyPath>) => unknown;
+  // The formatted-issues string MUST match this regex. Keep the regex tight
+  // enough to name the right field/path, loose enough to survive minor
+  // zod-message wording drift across patch releases.
+  expect: RegExp;
+}
+
+const DEFECT_CASES: DefectCase[] = [
+  {
+    row: 2,
+    desc: "classification lifted to top-level (top-level `sensitivity`)",
+    mutate: (f) => {
+      const lifted: Record<string, unknown> = { ...(f as object) };
+      lifted.sensitivity = f.provenance.classification.sensitivity;
+      // Drop the nested copy so the inner Sensitivity enum field is missing.
+      delete (lifted.provenance as { classification: Record<string, unknown> })
+        .classification.sensitivity;
+      return lifted;
+    },
+    expect: /provenance\.classification\.sensitivity/,
+  },
+  {
+    row: 3,
+    desc: "evidence as string (path) instead of array",
+    mutate: (f) => {
+      (f as unknown as { evidence: unknown }).evidence = "src/foo.ts";
+      return f;
+    },
+    expect: /evidence/,
+  },
+  {
+    row: 4,
+    desc: "evidence as plain object instead of array",
+    mutate: (f) => {
+      (f as unknown as { evidence: unknown }).evidence = {
+        kind: "changed_file",
+        path: "src/foo.ts",
+      };
+      return f;
+    },
+    expect: /evidence/,
+  },
+  {
+    row: 5,
+    desc: "evidence items missing `kind` discriminator",
+    mutate: (f) => {
+      (f as unknown as { evidence: unknown }).evidence = [
+        { path: "src/foo.ts" },
+      ];
+      return f;
+    },
+    expect: /evidence\.0(\..*)?/,
+  },
+  {
+    row: 6,
+    desc: "provenance flattened — top-level `source`/`url` instead of nested",
+    mutate: (f) => {
+      const flat: Record<string, unknown> = { ...(f as object) };
+      flat.source = f.provenance.source;
+      flat.url = "https://example.invalid/issue/42";
+      delete (flat as { provenance?: unknown }).provenance;
+      return flat;
+    },
+    expect: /provenance/,
+  },
+  {
+    row: 7,
+    desc: "provenance.classification lifted to top-level",
+    mutate: (f) => {
+      const lifted: Record<string, unknown> = { ...(f as object) };
+      lifted.classification = f.provenance.classification;
+      delete (lifted.provenance as { classification?: unknown }).classification;
+      return lifted;
+    },
+    expect: /provenance\.classification/,
+  },
+  {
+    row: 8,
+    desc: "freshness as string instead of `{ as_of }` object",
+    mutate: (f) => {
+      (
+        f.provenance.classification as unknown as { freshness: unknown }
+      ).freshness = "2026-06-09";
+      return f;
+    },
+    expect: /provenance\.classification\.freshness/,
+  },
+  // Row 9 dropped — see file-header comment.
+  {
+    row: 10,
+    desc: "validationTargets as string instead of array",
+    mutate: (f) => {
+      (f as unknown as { validationTargets: unknown }).validationTargets =
+        "src/foo.ts";
+      return f;
+    },
+    expect: /validationTargets/,
+  },
+  {
+    row: 11,
+    desc: "needsReview as string instead of boolean",
+    mutate: (f) => {
+      (f as unknown as { needsReview: unknown }).needsReview = "true";
+      return f;
+    },
+    expect: /needsReview/,
+  },
+  {
+    row: 12,
+    desc: "subsystem containing canonical-key delimiter `:`",
+    mutate: (f) => {
+      f.subsystem = "foo:bar";
+      return f;
+    },
+    expect: /subsystem/,
+  },
+  {
+    row: 13,
+    desc: "missing top-level `sourcetype`",
+    mutate: (f) => {
+      delete (f as unknown as { sourcetype?: unknown }).sourcetype;
+      return f;
+    },
+    expect: /sourcetype/,
+  },
+  {
+    row: 14,
+    desc: "extra/unknown top-level fields (e.g. `summary`, `tags`) — stripped, not rejected (base schema is non-strict; see file header)",
+    // Row 14 is the one defect class where the BASE schema does not REJECT —
+    // z.object() strips unknown keys. We instead assert that the extras do
+    // not LEAK into the parsed candidate (the contract the rest of the
+    // pipeline depends on). NG1 forbids tightening the schema to `.strict()`
+    // in this slot.
+    mutate: (f) => {
+      const withExtras = { ...(f as object), summary: "drop me", tags: ["x"] };
+      return withExtras;
+    },
+    expect: /__row14_marker_unused__/, // never matched; row 14 takes the alternate assertion path below
+  },
+];
+
+describe("T5: other defect rows (§1.1 rows 2–8, 10–14)", () => {
+  test.each(DEFECT_CASES.map((c) => [c.row, c.desc, c] as const))(
+    "row %d — %s",
+    (row, _desc, c) => {
+      const fixture = c.mutate(clone(baseHappyPath()));
+      const result = CandidateFragmentSchema.safeParse(fixture);
+
+      if (row === 14) {
+        // Row 14 — assert the documented current behavior: parse SUCCEEDS,
+        // extras stripped, canonical fields all present.
+        expect(result.success).toBe(true);
+        if (!result.success) return;
+        const parsedKeys = Object.keys(result.data);
+        expect(parsedKeys).not.toContain("summary");
+        expect(parsedKeys).not.toContain("tags");
+        // The canonical fields survived the parse.
+        expect(parsedKeys).toEqual(
+          expect.arrayContaining([
+            "sourcetype",
+            "subsystem",
+            "source_name",
+            "title",
+            "content",
+            "provenance",
+            "evidence",
+            "needsReview",
+            "validationTargets",
+          ]),
+        );
+        return;
+      }
+
+      expect(result.success).toBe(false);
+      if (result.success) return; // type guard
+      const formatted = formatIssues(result.error.issues);
+      expect(formatted).toMatch(c.expect);
+    },
+  );
+});
+
+// ── T7 — Integration: happy-path passes; every defect swap rejects ───────────
+//
+// T7 ties T4 + T5 together: ONE base, the SAME per-defect mutators, parse
+// runs end-to-end. The intent is to prove the happy path is wired correctly
+// AND that no defect leaks through under the same surface the production
+// helper uses. If a future schema tweak accidentally re-admits a defect,
+// this block fails at the integration layer in addition to the focused
+// T4/T5 case.
+
+describe("T7: integration — happy path + every defect swap", () => {
+  test("happy-path fragment parses successfully", () => {
+    const result = CandidateFragmentSchema.safeParse(baseHappyPath());
+    expect(result.success).toBe(true);
+    if (!result.success) return;
+    // Sanity: the parsed candidate's enum-typed fields survived.
+    expect(result.data.sourcetype).toBe("memory");
+    expect(result.data.provenance.classification.knowledge_type).toBe(
+      "process",
+    );
+  });
+
+  test.each(KNOWLEDGE_TYPE_ALIASES.map((a) => [a] as const))(
+    "alias swap (%s) rejected at integration layer",
+    (alias) => {
+      const result = CandidateFragmentSchema.safeParse(
+        fragmentWithAlias(alias),
+      );
+      expect(result.success).toBe(false);
+    },
+  );
+
+  test.each(DEFECT_CASES.map((c) => [c.row, c.desc, c] as const))(
+    "defect-row swap (row %d — %s) handled at integration layer",
+    (row, _desc, c) => {
+      const fixture = c.mutate(clone(baseHappyPath()));
+      const result = CandidateFragmentSchema.safeParse(fixture);
+      // Mirror T5: row 14 PASSES with extras stripped; all others REJECT.
+      if (row === 14) {
+        expect(result.success).toBe(true);
+        if (!result.success) return;
+        expect(Object.keys(result.data)).not.toContain("summary");
+        return;
+      }
+      expect(result.success).toBe(false);
+    },
+  );
+});
diff --git a/src/__tests__/atlas-dual-run.test.ts b/src/__tests__/atlas-dual-run.test.ts
new file mode 100644
index 0000000..f191517
--- /dev/null
+++ b/src/__tests__/atlas-dual-run.test.ts
@@ -0,0 +1,424 @@
+import { describe, it, expect } from "vitest";
+import { runDualRun } from "../atlas/dual-run.js";
+
+// T10 — dual-run shadow gate scaffold (spec §6.2, §7.6).
+//
+// Exercises the three precondition branches of `runDualRun`:
+//   (a) seed-present + match  ........... byte-equality after canonicalize
+//   (a) seed-present + diverge  ......... canonicalize differs → diagnose field
+//   (b) no-seed + relaxed-match  ........ same shape + enums + text ≥ 0.95
+//   (b) no-seed + diverge  .............. enum field differs
+//   (c) neither-available + gated  ...... refuse to advance
+
+describe("runDualRun — §7.6 T10 shadow-gate scaffold", () => {
+  it("seed-present: identical runs canonicalize equal → match", () => {
+    // Key-permuted but value-identical fragments. canonicalizeFragment sorts
+    // keys + normalizes whitespace, so the stringified canonical output is
+    // byte-equal — strict-comparator match.
+    const runA = {
+      title: "Atlas schema enforcement",
+      content: "First line.\nSecond line.",
+      sensitivity: "internal",
+      confidence: "high",
+    };
+    const runB = {
+      // Same fields, different insertion order, equivalent whitespace.
+      content: "First line. Second line.",
+      sensitivity: "internal",
+      title: "Atlas schema enforcement",
+      confidence: "high",
+    };
+
+    const verdict = runDualRun({
+      runA,
+      runB,
+      seedAvailable: true,
+      relaxedComparatorAvailable: false,
+    });
+
+    expect(verdict.result).toBe("match");
+    expect(verdict.reason).toMatch(/seed-present/);
+  });
+
+  it("seed-present: runs differing in title → diverge, reason names title", () => {
+    const runA = {
+      title: "Atlas schema enforcement",
+      content: "Same content body.",
+      sensitivity: "internal",
+      confidence: "high",
+    };
+    const runB = {
+      title: "Atlas schema enforcement — revised",
+      content: "Same content body.",
+      sensitivity: "internal",
+      confidence: "high",
+    };
+
+    const verdict = runDualRun({
+      runA,
+      runB,
+      seedAvailable: true,
+      relaxedComparatorAvailable: false,
+    });
+
+    expect(verdict.result).toBe("diverge");
+    expect(verdict.reason).toContain("title");
+  });
+
+  // M-1 + M-5 regression coverage — the no-seed-relaxed branch must read the
+  // five classification enums at their REAL nested path
+  // (provenance.classification.<field>) AND must also enforce sourcetype
+  // (top-level) and per-item evidence[].kind. The "both-missing" case for any
+  // covered enum is a structural divergence (per spec §7.6 the relaxed
+  // comparator's purpose is to catch enum drift; the classification record
+  // itself is a structural invariant).
+
+  // Minimum-valid CandidateFragment shape — matches CandidateFragmentObject
+  // in src/atlas/types.ts (validated by CandidateFragmentSchema). Helper keeps
+  // the relaxed-branch tests below readable; tests that need to perturb a
+  // single field call baseFragment() on both sides and mutate one side.
+  const baseFragment = () => ({
+    sourcetype: "github-pr" as const,
+    subsystem: "atlas",
+    source_name: "test-source",
+    title: "Atlas pipeline overview",
+    content: "Pipeline that canonicalizes fragments under provenance.",
+    provenance: {
+      source: "test",
+      classification: {
+        sensitivity: "internal" as const,
+        knowledge_type: "architecture" as const,
+        audience: "all-staff",
+        validation_status: "showcase-verified" as const,
+        confidence: "high" as const,
+        provenance_class: "primary" as const,
+        freshness: { as_of: "2026-06-12" },
+      },
+    },
+    evidence: [
+      { kind: "changed_file" as const, path: "src/atlas/dual-run.ts" },
+    ],
+    needsReview: false,
+    validationTargets: [],
+  });
+
+  it("no-seed: same enums + highly-similar text → relaxed-match", () => {
+    // Same shape (identical key set), nested enums byte-equal, text fields
+    // differ by ONE token out of 20+ — Jaccard well above 0.95.
+    const sharedTokens =
+      "the atlas pipeline canonicalizes fragments and ranks them by confidence and recency under provenance class primary";
+    const runA = baseFragment();
+    runA.content = sharedTokens + " plus an extra clarifying token here";
+    const runB = baseFragment();
+    runB.content = sharedTokens + " plus an extra clarifying token here";
+
+    const verdict = runDualRun({
+      runA,
+      runB,
+      seedAvailable: false,
+      relaxedComparatorAvailable: true,
+    });
+
+    expect(verdict.result).toBe("relaxed-match");
+  });
+
+  it("no-seed: enum field differs (sensitivity) → diverge", () => {
+    const runA = baseFragment();
+    const runB = baseFragment();
+    runB.provenance.classification.sensitivity =
+      "public" as typeof runB.provenance.classification.sensitivity;
+
+    const verdict = runDualRun({
+      runA,
+      runB,
+      seedAvailable: false,
+      relaxedComparatorAvailable: true,
+    });
+
+    expect(verdict.result).toBe("diverge");
+    expect(verdict.reason).toContain("sensitivity");
+  });
+
+  it("M-1 nesting: mismatched provenance.classification.knowledge_type → diverge", () => {
+    // Independent corroboration of the nested-path read for a SECOND enum
+    // (not sensitivity, exercised above). Catches a future regression that
+    // hardcodes the path for sensitivity but mis-handles the other four.
+    const runA = baseFragment();
+    const runB = baseFragment();
+    runB.provenance.classification.knowledge_type =
+      "ownership" as typeof runB.provenance.classification.knowledge_type;
+
+    const verdict = runDualRun({
+      runA,
+      runB,
+      seedAvailable: false,
+      relaxedComparatorAvailable: true,
+    });
+
+    expect(verdict.result).toBe("diverge");
+    expect(verdict.reason).toContain("knowledge_type");
+  });
+
+  it("M-1 sourcetype: top-level sourcetype mismatch → diverge", () => {
+    const runA = baseFragment();
+    const runB = baseFragment();
+    runB.sourcetype = "notion-doc" as typeof runB.sourcetype;
+
+    const verdict = runDualRun({
+      runA,
+      runB,
+      seedAvailable: false,
+      relaxedComparatorAvailable: true,
+    });
+
+    expect(verdict.result).toBe("diverge");
+    expect(verdict.reason).toContain("sourcetype");
+  });
+
+  it("T-R5-1 both-missing sourcetype: neither side has sourcetype → diverge", () => {
+    // CandidateFragmentObject declares `sourcetype` as a REQUIRED enum (no
+    // .optional(), no .default()). Per the M-5/T-R3-1/T-R3-2 precedent, the
+    // relaxed comparator must NOT silently pass when both sides are missing a
+    // required structural field. JSON.stringify(undefined) === undefined on
+    // both sides would compare-equal and silent-pass without an explicit
+    // both-missing guard.
+    const runA = baseFragment();
+    const runB = baseFragment();
+    delete (runA as any).sourcetype;
+    delete (runB as any).sourcetype;
+
+    const verdict = runDualRun({
+      runA,
+      runB,
+      seedAvailable: false,
+      relaxedComparatorAvailable: true,
+    });
+
+    expect(verdict.result).toBe("diverge");
+    expect(verdict.reason).toContain("sourcetype");
+  });
+
+  it("M-1 evidence[].kind: per-item evidence kind mismatch → diverge", () => {
+    const runA = baseFragment();
+    const runB = baseFragment();
+    // Swap the single evidence item's discriminant on runB.
+    runB.evidence = [{ kind: "linked_issue" as const, path: "x" } as any];
+
+    const verdict = runDualRun({
+      runA,
+      runB,
+      seedAvailable: false,
+      relaxedComparatorAvailable: true,
+    });
+
+    expect(verdict.result).toBe("diverge");
+    expect(verdict.reason).toMatch(/evidence/);
+  });
+
+  it("M-5 both-missing: neither side has provenance.classification.sensitivity → diverge", () => {
+    const runA = baseFragment();
+    const runB = baseFragment();
+    // Drop sensitivity on both sides. The classification record is a
+    // structural invariant per spec §7.6; both-missing must NOT silently pass.
+    delete (runA.provenance.classification as any).sensitivity;
+    delete (runB.provenance.classification as any).sensitivity;
+
+    const verdict = runDualRun({
+      runA,
+      runB,
+      seedAvailable: false,
+      relaxedComparatorAvailable: true,
+    });
+
+    expect(verdict.result).toBe("diverge");
+    expect(verdict.reason).toContain("sensitivity");
+  });
+
+  // T-R3-1: extend M-5 "both-missing → diverge" precedent to the required
+  // structural text fields (title, content). CandidateFragmentObject declares
+  // title: z.string() and content: z.string() — both REQUIRED, no .default(),
+  // no .optional(). Two fragments both lacking `title` (or both lacking
+  // `content`) are both malformed; the relaxed comparator must NOT collapse
+  // them to an empty-string Jaccard 1.0 silent pass.
+
+  it("T-R3-1 both-missing title: neither side has title → diverge", () => {
+    const runA = baseFragment();
+    const runB = baseFragment();
+    delete (runA as any).title;
+    delete (runB as any).title;
+
+    const verdict = runDualRun({
+      runA,
+      runB,
+      seedAvailable: false,
+      relaxedComparatorAvailable: true,
+    });
+
+    expect(verdict.result).toBe("diverge");
+    expect(verdict.reason).toContain("title");
+  });
+
+  it("T-R3-1 both-non-string title: neither side has string title → diverge", () => {
+    const runA = baseFragment();
+    const runB = baseFragment();
+    (runA as any).title = 42;
+    (runB as any).title = null;
+
+    const verdict = runDualRun({
+      runA,
+      runB,
+      seedAvailable: false,
+      relaxedComparatorAvailable: true,
+    });
+
+    expect(verdict.result).toBe("diverge");
+    expect(verdict.reason).toContain("title");
+  });
+
+  it("T-R3-1 both-missing content: neither side has content → diverge", () => {
+    const runA = baseFragment();
+    const runB = baseFragment();
+    delete (runA as any).content;
+    delete (runB as any).content;
+
+    const verdict = runDualRun({
+      runA,
+      runB,
+      seedAvailable: false,
+      relaxedComparatorAvailable: true,
+    });
+
+    expect(verdict.result).toBe("diverge");
+    expect(verdict.reason).toContain("content");
+  });
+
+  // T-R3-2: extend M-5 "both-missing → diverge" precedent to the evidence
+  // array. The schema declares `evidence: z.array(...).default([])` so AFTER
+  // parse evidence is always an array — but the comparator receives untyped
+  // `object` from upstream and is the structural pin against malformation
+  // bypassing the parser. Both sides missing the evidence field entirely
+  // means both fragments are malformed in the same way → diverge, not silent
+  // empty-array match.
+
+  it("T-R3-2 both-missing evidence: neither side has evidence → diverge", () => {
+    const runA = baseFragment();
+    const runB = baseFragment();
+    delete (runA as any).evidence;
+    delete (runB as any).evidence;
+
+    const verdict = runDualRun({
+      runA,
+      runB,
+      seedAvailable: false,
+      relaxedComparatorAvailable: true,
+    });
+
+    expect(verdict.result).toBe("diverge");
+    expect(verdict.reason).toMatch(/evidence/);
+  });
+
+  // T-R4-1: extend the M-5 / T-R3-2 "both-missing → diverge" precedent to the
+  // ASYMMETRIC mixed-shape variant. When ONE side's `evidence` is a non-array
+  // value (undefined / string / scalar / object) and the OTHER side is a
+  // well-formed array (including the empty array `[]`), the prior `?? []`
+  // fallback collapsed both sides to length-0 and the per-index loop trivially
+  // matched — a silent relaxed-match on a structurally divergent pair. Per
+  // spec §7.6, structurally different evidence shapes must diverge.
+
+  it("T-R4-1 asymmetric evidence: undefined vs [] → diverge", () => {
+    const runA = baseFragment();
+    const runB = baseFragment();
+    // Keep the `evidence` key present on both sides (so the top-level key-set
+    // check does not fire first); set runA to a non-array value to exercise
+    // the XOR shape-mismatch branch directly.
+    (runA as any).evidence = undefined; // non-array (undefined)
+    (runB as any).evidence = []; // valid empty array
+
+    const verdict = runDualRun({
+      runA,
+      runB,
+      seedAvailable: false,
+      relaxedComparatorAvailable: true,
+    });
+
+    expect(verdict.result).toBe("diverge");
+    expect(verdict.reason).toMatch(/evidence/);
+  });
+
+  it("T-R4-1 asymmetric evidence: non-array string vs [] → diverge", () => {
+    const runA = baseFragment();
+    const runB = baseFragment();
+    (runA as any).evidence = "not-an-array";
+    (runB as any).evidence = [];
+
+    const verdict = runDualRun({
+      runA,
+      runB,
+      seedAvailable: false,
+      relaxedComparatorAvailable: true,
+    });
+
+    expect(verdict.result).toBe("diverge");
+    expect(verdict.reason).toMatch(/evidence/);
+  });
+
+  // T-R4-2: extend the T-R3-1 "both-missing/non-string → diverge" precedent
+  // to the ASYMMETRIC mixed-shape variant for text fields. When ONE side has
+  // a valid empty string `""` and the OTHER side is non-string (undefined /
+  // number / null / object), the prior `?? ""` fallback collapsed both to
+  // `""`, Jaccard("", "") = 1.0, and the gate silently relaxed-matched on a
+  // structurally divergent pair. Per spec §7.6 + the schema's required
+  // `z.string()` declaration, shape-mismatch must diverge.
+
+  it("T-R4-2 asymmetric title: empty string vs undefined → diverge", () => {
+    const runA = baseFragment();
+    const runB = baseFragment();
+    // Keep the `title` key present on both sides (so the top-level key-set
+    // check does not fire first); the non-string side exercises the text-XOR
+    // shape-mismatch branch directly.
+    (runA as any).title = ""; // valid empty string
+    (runB as any).title = undefined; // non-string (undefined value)
+
+    const verdict = runDualRun({
+      runA,
+      runB,
+      seedAvailable: false,
+      relaxedComparatorAvailable: true,
+    });
+
+    expect(verdict.result).toBe("diverge");
+    expect(verdict.reason).toContain("title");
+  });
+
+  it("T-R4-2 asymmetric content: empty string vs non-string → diverge", () => {
+    const runA = baseFragment();
+    const runB = baseFragment();
+    (runA as any).content = ""; // valid empty string
+    (runB as any).content = 42; // non-string number
+
+    const verdict = runDualRun({
+      runA,
+      runB,
+      seedAvailable: false,
+      relaxedComparatorAvailable: true,
+    });
+
+    expect(verdict.result).toBe("diverge");
+    expect(verdict.reason).toContain("content");
+  });
+
+  it("neither seed nor relaxed comparator available → gated", () => {
+    const runA = { title: "x" };
+    const runB = { title: "x" };
+
+    const verdict = runDualRun({
+      runA,
+      runB,
+      seedAvailable: false,
+      relaxedComparatorAvailable: false,
+    });
+
+    expect(verdict.result).toBe("gated");
+    expect(verdict.reason).toMatch(/neither/i);
+  });
+});
diff --git a/src/__tests__/atlas-refinement-coverage.test.ts b/src/__tests__/atlas-refinement-coverage.test.ts
new file mode 100644
index 0000000..e84792b
--- /dev/null
+++ b/src/__tests__/atlas-refinement-coverage.test.ts
@@ -0,0 +1,138 @@
+import { describe, it, expect } from "vitest";
+import { readFileSync } from "node:fs";
+import { resolve } from "node:path";
+
+// ── T9 — Refinement-coverage stale-doc guard (spec §4.1.1 / §7.9) ─────────────
+//
+// `zod-to-json-schema` silently drops every `.refine(...)`, `.superRefine(...)`,
+// `.transform(...)`, and `.regex(...)` it cannot translate. The orchestration
+// shell hands the JSON-Schema'd document to the structured-output call, so
+// every Zod runtime predicate that does NOT round-trip into JSON Schema MUST
+// be wired into a post-pass Zod parse in the `atlas harvest write-fragment
+// --stdin` CLI helper (spec §4.2.1 STEP 2 + §4.6). Otherwise the predicate is
+// "silently lost" and malformed fragments land on disk.
+//
+// This test future-guards against the silent-drop class of bug: it walks
+// `src/atlas/types.ts`, counts every `.refine(`, `.superRefine(`, and
+// `.transform(` token IN CODE (comments stripped), then counts the rows in
+// the refinement-coverage table in `docs/atlas/refinement-coverage.md`, and
+// asserts the two counts agree. If a contributor adds a new refinement to
+// `types.ts` without adding a corresponding doc row (and therefore without
+// thinking about where to wire it into the post-pass), the test fails with
+// a stale-doc message that names the drift.
+
+const REPO_ROOT = resolve(__dirname, "..", "..");
+const TYPES_PATH = resolve(REPO_ROOT, "src", "atlas", "types.ts");
+const DOC_PATH = resolve(REPO_ROOT, "docs", "atlas", "refinement-coverage.md");
+
+// Strip `/* ... */` block comments and `// ...` line comments from a TS source
+// string. We strip block comments first (they may span multiple lines and
+// could contain `//` inside them); then we strip line comments. This is not a
+// full TypeScript tokenizer, but it is sufficient to keep the refinement
+// counter from picking up the in-source future-edit note that mentions
+// `.refine(...)` inside a `//` comment on line ~163 of `types.ts`.
+function stripComments(src: string): string {
+  // Remove /* ... */ (non-greedy, multiline).
+  const noBlock = src.replace(/\/\*[\s\S]*?\*\//g, "");
+  // Remove // ... to end of line.
+  const noLine = noBlock.replace(/\/\/[^\n]*/g, "");
+  return noLine;
+}
+
+// Count non-overlapping occurrences of a regex needle in `body`.
+function countOccurrences(body: string, needle: RegExp): number {
+  const matches = body.match(needle);
+  return matches === null ? 0 : matches.length;
+}
+
+// Count rows in the FIRST GitHub-flavored markdown table in `doc` that has the
+// expected refinement-coverage header (`| Refinement | Schema |`). The row
+// count EXCLUDES the header row and the `|---|---|...|` separator row.
+function countTableRows(doc: string): number {
+  const lines = doc.split("\n");
+  let inTable = false;
+  let sawSeparator = false;
+  let rowCount = 0;
+  for (const line of lines) {
+    const trimmed = line.trim();
+    if (!inTable) {
+      // The header row we're targeting: `| Refinement | Schema | JSON-Schema-expressible? | Post-pass note |`
+      if (/^\|\s*Refinement\s*\|\s*Schema\s*\|/i.test(trimmed)) {
+        inTable = true;
+      }
+      continue;
+    }
+    // Inside the table.
+    if (!sawSeparator) {
+      // The separator line: `|---|---|---|---|`
+      if (/^\|\s*-+\s*(\|\s*-+\s*)+\|?$/.test(trimmed)) {
+        sawSeparator = true;
+      }
+      continue;
+    }
+    // Data row OR end of table. A data row starts with `|`. A blank line or a
+    // non-`|` line ends the table.
+    if (trimmed.startsWith("|")) {
+      rowCount += 1;
+      continue;
+    }
+    if (trimmed === "") {
+      break;
+    }
+    // Some other content — treat as end of table.
+    break;
+  }
+  return rowCount;
+}
+
+describe("atlas refinement coverage (T9 — stale-doc guard)", () => {
+  it("doc table row count matches source refinement count", () => {
+    const typesSrc = readFileSync(TYPES_PATH, "utf8");
+    const docSrc = readFileSync(DOC_PATH, "utf8");
+
+    const codeOnly = stripComments(typesSrc);
+    const refineCount = countOccurrences(codeOnly, /\.refine\(/g);
+    const superRefineCount = countOccurrences(codeOnly, /\.superRefine\(/g);
+    const transformCount = countOccurrences(codeOnly, /\.transform\(/g);
+    const sourceCount = refineCount + superRefineCount + transformCount;
+
+    const tableRows = countTableRows(docSrc);
+
+    expect(
+      tableRows,
+      `refinement-coverage.md is stale — ${sourceCount} source refinements ` +
+        `(refine=${refineCount}, superRefine=${superRefineCount}, transform=${transformCount}) ` +
+        `vs ${tableRows} table rows. Update docs/atlas/refinement-coverage.md.`,
+    ).toBe(sourceCount);
+  });
+
+  it("doc Summary block reports a Total count that matches source", () => {
+    // A second, weaker assertion: the human-readable Summary block in the
+    // doc lists the total refinement count. If a contributor updates the
+    // table but forgets to update the summary numerals, that's also drift.
+    const typesSrc = readFileSync(TYPES_PATH, "utf8");
+    const docSrc = readFileSync(DOC_PATH, "utf8");
+
+    const codeOnly = stripComments(typesSrc);
+    const sourceCount =
+      countOccurrences(codeOnly, /\.refine\(/g) +
+      countOccurrences(codeOnly, /\.superRefine\(/g) +
+      countOccurrences(codeOnly, /\.transform\(/g);
+
+    // Match `Total refinements / transforms in \`src/atlas/types.ts\`: **N**`
+    const totalMatch = docSrc.match(
+      /Total refinements[^\n]*\*\*\s*(\d+)\s*\*\*/i,
+    );
+    expect(
+      totalMatch,
+      "refinement-coverage.md is missing a `Total refinements ... **N**` summary line.",
+    ).not.toBeNull();
+    const docTotal = Number(totalMatch![1]);
+    expect(
+      docTotal,
+      `refinement-coverage.md summary is stale — source has ${sourceCount} ` +
+        `refinements, summary says ${docTotal}. ` +
+        `Update docs/atlas/refinement-coverage.md.`,
+    ).toBe(sourceCount);
+  });
+});
diff --git a/src/atlas/dual-run.ts b/src/atlas/dual-run.ts
new file mode 100644
index 0000000..72fa0cc
--- /dev/null
+++ b/src/atlas/dual-run.ts
@@ -0,0 +1,315 @@
+// Phase-2 dual-run shadow gate (spec §6.2, §7.6 / T10).
+//
+// runDualRun compares two structured-output draws (runA, runB) for the SAME
+// fragment-shaped target and produces a verdict the harness uses to decide
+// whether Phase 2 may advance. The comparator has three precondition branches:
+//
+//   (a) seed-present (deterministic control): canonicalize both runs via
+//       `canonicalizeFragment` and require byte-equality of the resulting
+//       JSON.stringify. If they match → "match". Else → "diverge", with a
+//       reason naming the FIRST diverging top-level field.
+//
+//   (b) no-seed but a relaxed comparator is available: compare structurally
+//       — same top-level shape (same key set), enum fields byte-identical
+//       at their real paths (the five classification enums nested under
+//       `provenance.classification.*` — sensitivity, knowledge_type,
+//       validation_status, confidence, provenance_class — plus top-level
+//       `sourcetype` and per-item `evidence[].kind`), and free-text fields
+//       (title, content) with similarity ≥ 0.95. Pass → "relaxed-match";
+//       else → "diverge".
+//       Similarity uses a simple word-set Jaccard (|A∩B| / |A∪B|) — chosen
+//       over character-bigram cosine to avoid pulling in an extra dependency;
+//       Jaccard is robust enough for the gate threshold and trivially
+//       reproducible. See SIMILARITY_THRESHOLD below.
+//
+//   (c) neither available: "gated" — Phase 2 cannot advance.
+//
+// Verdicts are diagnostic, not destructive — the gate refuses to advance
+// rather than dropping data.
+
+import { canonicalizeFragment } from "./canonicalize.js";
+
+// Classification enum fields whose values must be byte-identical in the
+// no-seed relaxed branch. CandidateFragmentObject (src/atlas/types.ts) puts
+// these FIVE under `provenance.classification.<field>` — they are NOT top-
+// level on the fragment. Reading them at the wrong nesting level silently
+// passes every check on a real fragment; M-1 fixed that.
+const CLASSIFICATION_ENUM_FIELDS = [
+  "sensitivity",
+  "knowledge_type",
+  "validation_status",
+  "confidence",
+  "provenance_class",
+] as const;
+
+// Free-text fields compared by similarity in the relaxed branch.
+const TEXT_FIELDS = ["title", "content"] as const;
+
+// Jaccard similarity threshold for the relaxed comparator — same threshold
+// the spec calls for under the cosine framing; Jaccard is the equivalent
+// set-overlap measure for our short, mostly-token-distinct strings.
+const SIMILARITY_THRESHOLD = 0.95;
+
+export type DualRunResult = "match" | "relaxed-match" | "diverge" | "gated";
+
+export interface DualRunVerdict {
+  result: DualRunResult;
+  reason: string;
+}
+
+export interface DualRunOptions {
+  runA: object;
+  runB: object;
+  seedAvailable: boolean;
+  relaxedComparatorAvailable: boolean;
+}
+
+// Word-set Jaccard similarity in [0, 1]. Two empty strings are defined as
+// identical (similarity 1) — they are byte-equal and the relaxed comparator
+// has nothing to disagree about.
+function jaccardSimilarity(a: string, b: string): number {
+  const tokens = (s: string): Set<string> =>
+    new Set(
+      s
+        .toLowerCase()
+        .split(/\s+/)
+        .filter((t) => t.length > 0),
+    );
+  const setA = tokens(a);
+  const setB = tokens(b);
+  if (setA.size === 0 && setB.size === 0) return 1;
+  let intersection = 0;
+  for (const t of setA) if (setB.has(t)) intersection += 1;
+  const union = setA.size + setB.size - intersection;
+  if (union === 0) return 1;
+  return intersection / union;
+}
+
+// Find the first top-level field whose canonicalized JSON differs between
+// runA and runB. Returns the field name, or null if the two are byte-equal
+// at every top-level key (in which case they should also stringify equal).
+function firstDivergingField(
+  a: Record<string, unknown>,
+  b: Record<string, unknown>,
+): string | null {
+  const keys = new Set<string>([...Object.keys(a), ...Object.keys(b)]);
+  // Sort for determinism — we want the SAME "first" field to be reported
+  // regardless of object key-insertion order on either side.
+  const sortedKeys = Array.from(keys).sort();
+  for (const k of sortedKeys) {
+    if (JSON.stringify(a[k]) !== JSON.stringify(b[k])) return k;
+  }
+  return null;
+}
+
+export function runDualRun(opts: DualRunOptions): DualRunVerdict {
+  const { runA, runB, seedAvailable, relaxedComparatorAvailable } = opts;
+
+  // Branch (a): seed-present — strict byte-equality after canonicalize.
+  if (seedAvailable) {
+    const canonA = canonicalizeFragment(runA) as Record<string, unknown>;
+    const canonB = canonicalizeFragment(runB) as Record<string, unknown>;
+    if (JSON.stringify(canonA) === JSON.stringify(canonB)) {
+      return {
+        result: "match",
+        reason: "seed-present byte-equality after canonicalize",
+      };
+    }
+    const field = firstDivergingField(canonA, canonB);
+    return {
+      result: "diverge",
+      reason: field
+        ? `seed-present canonicalized runs diverge at field "${field}"`
+        : "seed-present canonicalized runs diverge",
+    };
+  }
+
+  // Branch (b): no-seed but relaxed comparator available.
+  if (relaxedComparatorAvailable) {
+    const a = runA as Record<string, unknown>;
+    const b = runB as Record<string, unknown>;
+
+    // Shape compat: same top-level key set. Schema validation happened
+    // upstream, so we only need to confirm the two runs are comparing the
+    // same field surface.
+    const keysA = new Set(Object.keys(a));
+    const keysB = new Set(Object.keys(b));
+    if (keysA.size !== keysB.size) {
+      return {
+        result: "diverge",
+        reason: `no-seed relaxed: top-level key sets differ in size (${keysA.size} vs ${keysB.size})`,
+      };
+    }
+    for (const k of keysA) {
+      if (!keysB.has(k)) {
+        return {
+          result: "diverge",
+          reason: `no-seed relaxed: key "${k}" missing on runB`,
+        };
+      }
+    }
+
+    // Classification enum fields: byte-identical, read at the REAL nested
+    // path `provenance.classification.<field>`. A future regression that
+    // moved them top-level (or renamed `classification`) would re-trip the
+    // both-missing rule below and surface as `diverge`, not a silent pass.
+    const classA =
+      ((a.provenance as Record<string, unknown> | undefined)?.classification as
+        | Record<string, unknown>
+        | undefined) ?? {};
+    const classB =
+      ((b.provenance as Record<string, unknown> | undefined)?.classification as
+        | Record<string, unknown>
+        | undefined) ?? {};
+    for (const field of CLASSIFICATION_ENUM_FIELDS) {
+      const hasA = field in classA;
+      const hasB = field in classB;
+      // M-5: schema requires every classification enum present on every
+      // valid fragment; both-missing means at least one side is malformed,
+      // which is a structural divergence from the contract.
+      if (!hasA && !hasB) {
+        return {
+          result: "diverge",
+          reason: `no-seed relaxed: classification enum "${field}" missing on both sides`,
+        };
+      }
+      if (JSON.stringify(classA[field]) !== JSON.stringify(classB[field])) {
+        return {
+          result: "diverge",
+          reason: `no-seed relaxed: classification enum "${field}" differs`,
+        };
+      }
+    }
+
+    // Top-level `sourcetype` enum: also covered by spec §7.6. Per the M-5 /
+    // T-R3-1 / T-R3-2 "both-missing → diverge" precedent, `sourcetype` is a
+    // REQUIRED structural enum on CandidateFragmentObject (no .optional(), no
+    // .default()). Without an explicit both-missing guard, the JSON.stringify
+    // compare below collapses to `undefined === undefined` and silent-passes.
+    const hasSourcetypeA = "sourcetype" in a;
+    const hasSourcetypeB = "sourcetype" in b;
+    if (!hasSourcetypeA && !hasSourcetypeB) {
+      return {
+        result: "diverge",
+        reason: `no-seed relaxed: enum field "sourcetype" missing on both sides`,
+      };
+    }
+    if (JSON.stringify(a.sourcetype) !== JSON.stringify(b.sourcetype)) {
+      return {
+        result: "diverge",
+        reason: `no-seed relaxed: enum field "sourcetype" differs`,
+      };
+    }
+
+    // Per-item `evidence[].kind` enum: both sides must have the same number
+    // of evidence items AND the same `kind` discriminant at each index. A
+    // length or kind mismatch is a structural enum divergence; positional
+    // alignment matches the per-index canonicalize ordering.
+    //
+    // T-R3-2: extension of the M-5 "both-missing → diverge" precedent. The
+    // schema declares `evidence: z.array(...).default([])` — i.e. AFTER parse
+    // it is always an array. The comparator receives `object` (untyped) and
+    // serves as the structural pin against malformation that bypasses the
+    // parser. If both sides are missing/non-array, the prior `?? []` fallback
+    // would collapse both to length-0 and silently pass — that is the silent
+    // pass on a malformed shape M-5 codified against. Diverge instead.
+    const evAArray = Array.isArray(a.evidence);
+    const evBArray = Array.isArray(b.evidence);
+    // T-R4-1: ASYMMETRIC mixed-shape XOR. When one side is an array and the
+    // other is not, the prior `?? []` fallback collapsed the non-array side
+    // to length-0 and silently relaxed-matched against a well-formed empty
+    // array on the other side. Per spec §7.6, structurally different
+    // evidence shapes must diverge — check XOR BEFORE the both-missing
+    // branch so the asymmetric class is closed.
+    if (evAArray !== evBArray) {
+      return {
+        result: "diverge",
+        reason:
+          "no-seed relaxed: evidence shape mismatch (one side is not an array)",
+      };
+    }
+    if (!evAArray && !evBArray) {
+      return {
+        result: "diverge",
+        reason: "no-seed relaxed: evidence array missing on both sides",
+      };
+    }
+    // Both arrays at this point — proceed with length + per-index check.
+    const evA = a.evidence as unknown[];
+    const evB = b.evidence as unknown[];
+    if (evA.length !== evB.length) {
+      return {
+        result: "diverge",
+        reason: `no-seed relaxed: evidence array length differs (${evA.length} vs ${evB.length})`,
+      };
+    }
+    for (let i = 0; i < evA.length; i += 1) {
+      const kA = (evA[i] as Record<string, unknown> | undefined)?.kind;
+      const kB = (evB[i] as Record<string, unknown> | undefined)?.kind;
+      if (JSON.stringify(kA) !== JSON.stringify(kB)) {
+        return {
+          result: "diverge",
+          reason: `no-seed relaxed: evidence[${i}].kind differs`,
+        };
+      }
+    }
+
+    // Free-text fields: Jaccard similarity ≥ threshold.
+    //
+    // T-R3-1: extension of the M-5 "both-missing → diverge" precedent.
+    // CandidateFragmentObject declares `title: z.string()` and
+    // `content: z.string()` — both REQUIRED (no `.default()`, no
+    // `.optional()`). When BOTH sides have a missing/non-string value, the
+    // prior empty-string fallback would produce two empty token sets, the
+    // jaccard function returns 1.0 for two empty strings, and the relaxed
+    // branch silently matched on a structurally-malformed pair. Pre-check
+    // for both-missing on each required text field and diverge BEFORE
+    // hitting the similarity fallback.
+    for (const field of TEXT_FIELDS) {
+      const aIsString = typeof a[field] === "string";
+      const bIsString = typeof b[field] === "string";
+      // T-R4-2: ASYMMETRIC mixed-shape XOR. When one side has a valid
+      // (possibly empty) string and the other side is non-string, the prior
+      // `?? ""` fallback collapsed the non-string side to `""` and
+      // Jaccard("", "") = 1.0 silently relaxed-matched a structurally
+      // divergent pair. Per spec §7.6 + the schema's required `z.string()`
+      // declaration, shape-mismatch must diverge — check XOR BEFORE the
+      // both-missing branch so the asymmetric class is closed.
+      if (aIsString !== bIsString) {
+        return {
+          result: "diverge",
+          reason: `no-seed relaxed: text field "${field}" shape mismatch (one side is not a string)`,
+        };
+      }
+      if (!aIsString && !bIsString) {
+        return {
+          result: "diverge",
+          reason: `no-seed relaxed: text field "${field}" missing on both sides`,
+        };
+      }
+      // Both strings at this point — proceed with Jaccard similarity.
+      const ta = a[field] as string;
+      const tb = b[field] as string;
+      const sim = jaccardSimilarity(ta, tb);
+      if (sim < SIMILARITY_THRESHOLD) {
+        return {
+          result: "diverge",
+          reason: `no-seed relaxed: text field "${field}" similarity ${sim.toFixed(3)} < ${SIMILARITY_THRESHOLD}`,
+        };
+      }
+    }
+
+    return {
+      result: "relaxed-match",
+      reason:
+        "no-seed relaxed: shape + enums equal, text similarity above threshold",
+    };
+  }
+
+  // Branch (c): neither precondition met — gate refuses to advance.
+  return {
+    result: "gated",
+    reason:
+      "neither seed control nor relaxed comparator is available; Phase 2 cannot advance",
+  };
+}

From 9c5bee90b8d64b85b57c32dd938b21208554beda Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jpr5@darkridge.com>
Date: Fri, 12 Jun 2026 11:36:52 -0700
Subject: [PATCH 6/6] Document Phase-0 schema-enforced write path and fragment
 on-disk contract

---
 runs/fragments/README.md                |  64 ++++++++
 scripts/atlas-harvest/blitz-manifest.md |   2 +
 scripts/atlas-harvest/leaf-prompt.md    | 195 +++++++++++++++++++-----
 3 files changed, 226 insertions(+), 35 deletions(-)
 create mode 100644 runs/fragments/README.md

diff --git a/runs/fragments/README.md b/runs/fragments/README.md
new file mode 100644
index 0000000..39746e2
--- /dev/null
+++ b/runs/fragments/README.md
@@ -0,0 +1,64 @@
+# Atlas Harvest — fragment on-disk contract
+
+Fragments under `<runs-dir>/<run-id>/fragments/<stem>.json` are the canonical
+durable artifact of a Tier-1 leaf-fleet run. They are the seam between the
+agent-orchestration half (the leaf fleet) and the deterministic in-process
+half (`atlas harvest run` and downstream tiers).
+
+## On-disk format
+
+One JSON object per file, pretty-printed, validated against
+`CandidateFragmentSchema` (in `src/atlas/types.ts`) — or
+`EpisodicCandidateFragmentSchema` when `sourcetype: "episodic"`, which layers
+the four episodic-invariant refinements (`needsReview`, `provenance_class`,
+`confidence`, `validation_status`) on top of the base.
+
+See `scripts/atlas-harvest/leaf-prompt.md` for the field-by-field contract and
+worked examples.
+
+## Stem derivation
+
+The file stem is supplied explicitly via `--stem <stem>` to the
+`atlas harvest write-fragment` CLI. When `--stem` is omitted, the stem is
+derived from the fragment's canonical-key components — concretely
+`claimSlug(<sourcetype>:<subsystem>:claimSlug(claimSlugHint || title))`
+(`claimSlugHint` is optional on `CandidateFragmentSchema`; the CLI falls back
+to the fragment `title` when no hint is supplied). The stem derivation and
+the fragment's `canonical_key` are produced by different functions and yield
+different strings — the stem is a filesystem-safe slug, not a copy of the
+canonical key. The derivation is still idempotent across runs and two
+fragments with the same claim text but different sourcetype/subsystem never
+collide.
+
+## Canonical write boundary
+
+Only `atlas harvest write-fragment --stdin` writes into this directory in
+Phase 0. Direct `fs.writeFile` from leaves is deprecated as of Phase 0 — it
+still works (existing leaves are not broken) but it is no longer the supported
+write path, and Phase 1 will remove the leaf-side writer entirely.
+
+The write CLI reads a single fragment JSON from stdin, validates it, and
+writes it to `<runs-dir>/<run-id>/fragments/<stem>.json`.
+
+## Schema validation
+
+The CLI Zod-parses the input before writing. Exit-code matrix (spec §4.2.1):
+
+- `0` — success (fragment written; absolute path printed to stdout)
+- `1` — stdin/IO failure (bad JSON, unreadable stdin, write error other than EEXIST)
+- `2` — stem collision (file already exists)
+- `3` — schema validation failure (base `CandidateFragmentSchema` rejected the input)
+- `4` — episodic invariant violation (one of `needsReview`/`provenance_class`/`confidence`/`validation_status` failed the episodic refinement)
+
+stderr always carries the underlying Zod / IO error message; the exit code
+distinguishes the FAILURE CLASS so the caller (leaf adapter, CI gate) can
+route accordingly.
+
+## Atomic create
+
+The CLI creates fragment files EXCLUSIVELY (the underlying open uses the `wx`
+flag). A pre-existing file at the same stem yields exit code 2 (`EEXIST`) and
+no write occurs — the prior fragment is never silently overwritten.
+
+To re-mint a fragment at the same stem, delete the file first (or run with a
+fresh `--run-id`).
diff --git a/scripts/atlas-harvest/blitz-manifest.md b/scripts/atlas-harvest/blitz-manifest.md
index cae657f..16cc71c 100644
--- a/scripts/atlas-harvest/blitz-manifest.md
+++ b/scripts/atlas-harvest/blitz-manifest.md
@@ -34,6 +34,8 @@ runs AFTER the fleet, over the fragments this fleet produces.
 | `FRAGMENTS_DIR` | Absolute path to `runs/<RUN_ID>/fragments/`. The single write target.                                                    |
 | `AS_OF`         | The harvest "as of" calendar date (`YYYY-MM-DD`) stamped into provenance freshness for sources that lack their own date. |
 
+- Phase-0 canonical write path: pipe fragments through `atlas harvest write-fragment --stdin`. See `runs/fragments/README.md` for the on-disk contract.
+
 ## Fragment id convention
 
 Each leaf owns a unique, filesystem-safe, deterministic file stem so parallel
diff --git a/scripts/atlas-harvest/leaf-prompt.md b/scripts/atlas-harvest/leaf-prompt.md
index 25cc2d5..340a522 100644
--- a/scripts/atlas-harvest/leaf-prompt.md
+++ b/scripts/atlas-harvest/leaf-prompt.md
@@ -88,33 +88,40 @@ Every fragment file is ONE object of this shape:
 ```jsonc
 {
   "sourcetype": "memory | episodic | github-pr | github-issue | notion-doc | linear-doc | agent-doc | derived",
-  "subsystem": "<subsystem/saga slug>",          // required — must NOT contain ':' (canonical-key delimiter) or '⟦'/'⟧' (approval-marker delimiters); the schema hard-rejects all three
-  "claimSlugHint": "<optional claim slug>",        // optional
-  "source_name": "<logical source name>",          // required
+  "subsystem": "<subsystem/saga slug>", // required — must NOT contain ':' (canonical-key delimiter) or '⟦'/'⟧' (approval-marker delimiters); the schema hard-rejects all three
+  "claimSlugHint": "<optional claim slug>", // optional
+  "source_name": "<logical source name>", // required
   "repo_url": "<optional>",
   "ref": "<optional branch/ref>",
-  "title": "<DISTILLED claim — NOT a raw source title>",   // required
-  "content": "<why/how prose>",                    // required
-  "provenance": {                                   // required
-    "source": "<source label>",                    // required
+  "title": "<DISTILLED claim — NOT a raw source title>", // required
+  "content": "<why/how prose>", // required
+  "provenance": {
+    // required
+    "source": "<source label>", // required
     "url": "<optional>",
     "date": "<optional YYYY-MM-DD>",
     "commit": "<optional>",
     "version": "<optional>",
     "validated_against": "<optional free-text>",
-    "classification": {                             // required — all 7 dims
+    "classification": {
+      // required — all 7 dims
       "sensitivity": "public | internal | proprietary | secret",
       "knowledge_type": "architecture | design-rationale | root-cause | ownership | operational | protocol | security | process | product | gtm | org-culture",
-      "audience": "<free string, e.g. all-staff | engineering | gtm>",  // defaults to "all-staff"
+      "audience": "<free string, e.g. all-staff | engineering | gtm>", // defaults to "all-staff"
       "validation_status": "unverified | source-verified | showcase-verified",
       "confidence": "high | medium | low",
       "provenance_class": "primary | derived",
-      "freshness": { "as_of": "YYYY-MM-DD", "re_verify_by": "YYYY-MM-DD (optional)" }
-    }
+      "freshness": {
+        "as_of": "YYYY-MM-DD",
+        "re_verify_by": "YYYY-MM-DD (optional)",
+      },
+    },
   },
-  "evidence": [ /* zero or more, kind-discriminated — see below */ ],
-  "needsReview": false,                             // episodic ⇒ true
-  "validationTargets": [ "<symbol-or-repo-relative-path>", "..." ]
+  "evidence": [
+    /* zero or more, kind-discriminated — see below */
+  ],
+  "needsReview": false, // episodic ⇒ true
+  "validationTargets": ["<symbol-or-repo-relative-path>", "..."],
 }
 ```
 
@@ -143,54 +150,142 @@ Rules the leaf must honor (the adapters enforce these — match them):
 
 ---
 
+## Phase-0 schema-enforced write path
+
+<!-- atlas-phase-0-bridge: this section can be removed once Phase 1 lands and auto-generated schema replaces the hand-written field list -->
+
+In Phase 0 the leaf no longer writes its own JSON file via `fs.writeFile`. The
+orchestration shell composes the fragment object (or the harness produces it
+via `agent(prompt, {schema})` structured output) and pipes it to the canonical
+write CLI:
+
+```
+echo "$fragment_json" | atlas harvest write-fragment \
+  --run-id <run-id> --runs-dir <dir> [--stem <stem>] --stdin
+```
+
+The CLI validates against `CandidateFragmentSchema` (or
+`EpisodicCandidateFragmentSchema` when `sourcetype === "episodic"`, which
+layers the four episodic-invariant refinements on top of the base), then
+writes the validated fragment EXCLUSIVELY to
+`<runs-dir>/<run-id>/fragments/<stem>.json`. `--stem` is OPTIONAL — when
+omitted, the CLI derives the stem from the fragment's canonical-key components
+(`claimSlug(<sourcetype>:<subsystem>:claimSlug(claimSlugHint || title))` —
+`claimSlugHint` is optional, so the CLI falls back to the fragment `title`
+when no hint is supplied) so the same write path remains idempotent across
+canonicalize.
+
+Exit-code matrix (spec §4.2.1):
+
+- `0` — success (fragment written; absolute path printed to stdout)
+- `1` — stdin/IO failure (bad JSON, unreadable stdin, write error other than EEXIST)
+- `2` — stem collision (file already exists)
+- `3` — schema validation failure (base `CandidateFragmentSchema` rejected the input)
+- `4` — episodic invariant violation (one of `needsReview`/`provenance_class`/`confidence`/`validation_status`)
+
+Phase 0 ships the CLI; the HOW-TO-WRITE-JSON section above is unchanged and
+leaves can still emit JSON in their reports the same way. Phase 1 (a separate
+PR) rewrites this prompt to auto-generate the schema block from the Zod source
+in `src/atlas/types.ts`, eliminating the hand-written field list as the
+single source of truth.
+
+---
+
 ## Per-family `*Unit` input shapes (what you assemble in STEP 2)
 
 These are the exact adapter input shapes (from `src/atlas/adapters/*.ts`).
 
 **memory** (`MemoryFileUnit`):
+
 ```jsonc
-{ "filename": "memory/feedback_nextjs_bundles_node_modules.md", "contents": "<full file: frontmatter + body>" }
+{
+  "filename": "memory/feedback_nextjs_bundles_node_modules.md",
+  "contents": "<full file: frontmatter + body>",
+}
 ```
 
 **github-pr** (`GitHubPullRequestUnit`):
+
 ```jsonc
 {
   "kind": "pull_request",
   "sourceName": "github-pr:CopilotKit/pathfinder#1746",
-  "repo": { "fullName": "CopilotKit/pathfinder", "cloneUrl": "https://github.com/CopilotKit/pathfinder.git", "defaultBranch": "main" },
-  "pullRequest": { "number": 1746, "title": "...", "body": "...", "htmlUrl": "https://github.com/.../pull/1746",
-                   "mergeCommitSha": "...", "baseRef": "main", "headRef": "...", "author": "...", "mergedBy": "..." },
-  "changedFiles": ["src/db/atlas.ts"], "linkedIssues": ["https://github.com/.../issues/1732"], "reviewThreads": ["..."]
+  "repo": {
+    "fullName": "CopilotKit/pathfinder",
+    "cloneUrl": "https://github.com/CopilotKit/pathfinder.git",
+    "defaultBranch": "main",
+  },
+  "pullRequest": {
+    "number": 1746,
+    "title": "...",
+    "body": "...",
+    "htmlUrl": "https://github.com/.../pull/1746",
+    "mergeCommitSha": "...",
+    "baseRef": "main",
+    "headRef": "...",
+    "author": "...",
+    "mergedBy": "...",
+  },
+  "changedFiles": ["src/db/atlas.ts"],
+  "linkedIssues": ["https://github.com/.../issues/1732"],
+  "reviewThreads": ["..."],
 }
 ```
 
 **github-issue** (`GitHubIssueUnit`):
+
 ```jsonc
 {
   "kind": "issue",
   "sourceName": "github-issue:CopilotKit/pathfinder#1732",
   "repo": { "fullName": "...", "cloneUrl": "...", "defaultBranch": "main" },
-  "issue": { "number": 1732, "title": "...", "body": "...", "htmlUrl": "...", "author": "...", "state": "closed" },
-  "linkedIssues": [], "reviewThreads": []
+  "issue": {
+    "number": 1732,
+    "title": "...",
+    "body": "...",
+    "htmlUrl": "...",
+    "author": "...",
+    "state": "closed",
+  },
+  "linkedIssues": [],
+  "reviewThreads": [],
 }
 ```
 
 **notion-doc** (`NotionPageUnit`):
+
 ```jsonc
 {
-  "url": "https://www.notion.so/...", "title": "Interrupts Proposal — Design Decisions",
-  "subsystem": "agui-protocol", "repo_url": "<optional>", "ref": "<optional>", "date": "2026-05-20",
-  "sections": [ { "heading": "Decision 1: Resume tokens are opaque", "body": "..." }, { "heading": "Context", "body": "..." } ]
+  "url": "https://www.notion.so/...",
+  "title": "Interrupts Proposal — Design Decisions",
+  "subsystem": "agui-protocol",
+  "repo_url": "<optional>",
+  "ref": "<optional>",
+  "date": "2026-05-20",
+  "sections": [
+    { "heading": "Decision 1: Resume tokens are opaque", "body": "..." },
+    { "heading": "Context", "body": "..." },
+  ],
 }
 ```
+
 (The adapter splits on decision headings: `Decision …`, `ADR …`, `N. …`. Non-decision sections like Context are page-level only.)
 
 **linear-doc** (`LinearDocUnit`):
+
 ```jsonc
 {
-  "url": "https://linear.app/...", "title": "...", "problem": "...", "why": "...",
-  "nonGoals": ["..."], "citedFiles": ["src/..."], "notionCrossLink": "<optional Notion url>",
-  "subsystem": "runtime", "area": "<optional>", "updatedAt": "2026-05-30", "knowledgeType": "ownership"
+  "url": "https://linear.app/...",
+  "title": "...",
+  "problem": "...",
+  "why": "...",
+  "nonGoals": ["..."],
+  "citedFiles": ["src/..."],
+  "notionCrossLink": "<optional Notion url>",
+  "subsystem": "runtime",
+  "area": "<optional>",
+  "updatedAt": "2026-05-30",
+  "knowledgeType": "ownership",
 }
 ```
 
@@ -198,25 +293,49 @@ These are the exact adapter input shapes (from `src/atlas/adapters/*.ts`).
 invariants (`needsReview: true`, `validation_status: "unverified"`,
 `provenance_class: "derived"`, `confidence: "low"` clamped, `sensitivity`
 floored at `"internal"` preserving any stronger signal):
+
 ```jsonc
-{ "convPath": "<session jsonl path or link>", "date": "2026-06-07", "text": "<raw transcript window>", "subsystem": "<optional hint>" }
+{
+  "convPath": "<session jsonl path or link>",
+  "date": "2026-06-07",
+  "text": "<raw transcript window>",
+  "subsystem": "<optional hint>",
+}
 ```
 
 **agent-doc / source-comment** (`SourceCommentUnit`):
+
 ```jsonc
 {
   "filePath": "packages/react-core/src/use-coagent-state-render-bridge.tsx",
-  "lineStart": 24, "lineEnd": 45,
-  "commentText": "<the design-block comment>", "codeRegion": "<the annotated code>",
-  "subsystem": "react-core", "repoUrl": "<optional>", "ref": "<optional>", "sourceUrl": "<optional GitHub blob #Lx-Ly>"
+  "lineStart": 24,
+  "lineEnd": 45,
+  "commentText": "<the design-block comment>",
+  "codeRegion": "<the annotated code>",
+  "subsystem": "react-core",
+  "repoUrl": "<optional>",
+  "ref": "<optional>",
+  "sourceUrl": "<optional GitHub blob #Lx-Ly>",
 }
 ```
 
 **derived / showcase** (`ShowcaseUnit`):
+
 ```jsonc
 {
-  "manifest": { "integration": "langgraph-python", "name": "LangGraph (Python)", "repo_url": "<optional>", "description": "<optional>", "features": ["agentic-chat", "gen-ui"] },
-  "registry": { "version": "1", "categories": [ { "id": "...", "pills": [ { "id": "agentic-chat", "status": "green" } ] } ] }
+  "manifest": {
+    "integration": "langgraph-python",
+    "name": "LangGraph (Python)",
+    "repo_url": "<optional>",
+    "description": "<optional>",
+    "features": ["agentic-chat", "gen-ui"],
+  },
+  "registry": {
+    "version": "1",
+    "categories": [
+      { "id": "...", "pills": [{ "id": "agentic-chat", "status": "green" }] },
+    ],
+  },
 }
 ```
 
@@ -290,8 +409,14 @@ symbol as a `validationTarget`:
     }
   },
   "evidence": [
-    { "kind": "changed_file", "path": "packages/react-core/src/use-coagent-state-render-bridge.tsx:24-45" },
-    { "kind": "fused_from", "ref": "source-comment:packages/react-core/src/use-coagent-state-render-bridge.tsx:24-45" }
+    {
+      "kind": "changed_file",
+      "path": "packages/react-core/src/use-coagent-state-render-bridge.tsx:24-45"
+    },
+    {
+      "kind": "fused_from",
+      "ref": "source-comment:packages/react-core/src/use-coagent-state-render-bridge.tsx:24-45"
+    }
   ],
   "needsReview": false,
   "validationTargets": ["useCoagentStateRenderBridge"]