From 20073199eba264d772d9af92708d208f2aa74169 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 12 Jun 2026 11:36:52 -0700 Subject: [PATCH 1/6] Add canonicalizeFragment utility for fragment shadow-gate comparison --- .../atlas-canonicalize-fragment.test.ts | 135 ++++++++++++++++++ src/atlas/canonicalize.ts | 72 ++++++++++ 2 files changed, 207 insertions(+) create mode 100644 src/__tests__/atlas-canonicalize-fragment.test.ts diff --git a/src/__tests__/atlas-canonicalize-fragment.test.ts b/src/__tests__/atlas-canonicalize-fragment.test.ts new file mode 100644 index 0000000..8238aea --- /dev/null +++ b/src/__tests__/atlas-canonicalize-fragment.test.ts @@ -0,0 +1,135 @@ +import { describe, it, expect } from "vitest"; +import { canonicalizeFragment } from "../atlas/canonicalize.js"; + +// Tests for the §6.2 `canonicalizeFragment` JSON-stringify normalizer used by +// the Phase-2 dual-run shadow comparator. The function is named +// `canonicalizeFragment` (not `canonicalize`) inside src/atlas/canonicalize.ts +// because that module already exports the Tier-3 ranker `canonicalize` +// (different signature, different role). See SLOT-1 of the implementation plan. + +describe("canonicalizeFragment — §6.2 normalizer", () => { + // ── T1: recursive key-sort stability ──────────────────────────────────────── + // Two objects whose keys differ only by INSERTION ORDER must canonicalize to + // byte-identical JSON.stringify output. The dual-run comparator relies on + // `JSON.stringify(canonicalizeFragment(a)) === JSON.stringify(canonicalizeFragment(b))` + // (spec §6.2), so the canonical output must be stable regardless of which + // order the model emitted the keys. + it("T1: emits stable JSON-stringified output across key-permuted inputs", () => { + // Two CandidateFragment-shaped objects whose top-level AND nested keys are + // permuted between the two literals. Same field values; different order. + const a = { + sourcetype: "github" as const, + subsystem: "atlas", + title: "Schema enforcement at the leaf boundary", + content: "Hello world.", + claimSlugHint: "schema-enforcement", + evidence: [ + { kind: "url", ref: "https://example.com/a" }, + { kind: "url", ref: "https://example.com/b" }, + ], + provenance: { + date: "2026-06-12", + classification: { + knowledge_type: "behavior", + provenance_class: "primary", + validation_status: "showcase-verified", + confidence: "high", + }, + }, + }; + + const b = { + // Top-level keys permuted. + title: "Schema enforcement at the leaf boundary", + provenance: { + // Nested keys permuted. + classification: { + confidence: "high", + validation_status: "showcase-verified", + provenance_class: "primary", + knowledge_type: "behavior", + }, + date: "2026-06-12", + }, + // evidence kept in the SAME element order (arrays are positional). + evidence: [ + { ref: "https://example.com/a", kind: "url" }, + { ref: "https://example.com/b", kind: "url" }, + ], + claimSlugHint: "schema-enforcement", + content: "Hello world.", + subsystem: "atlas", + sourcetype: "github" as const, + }; + + const canonA = JSON.stringify(canonicalizeFragment(a)); + const canonB = JSON.stringify(canonicalizeFragment(b)); + expect(canonA).toBe(canonB); + // Also assert deep-equal as a redundant structural check. + expect(canonicalizeFragment(a)).toEqual(canonicalizeFragment(b)); + }); + + // ── T2: whitespace normalization + numeric round-trip + array order ──────── + // §6.2(b): strings trim + collapse internal whitespace runs (including + // newlines and tabs) to a single space — explicitly lossy on free-text fields. + // §6.2(c): numeric round-trip so `1.0 ≡ 1`. + // §6.2(d): arrays are NOT sorted — element order is load-bearing for + // `evidence[]` and must be preserved positionally. + it("T2: normalizes whitespace lossily, round-trips numerics, preserves array order", () => { + // Whitespace: leading + trailing trim, internal runs (spaces + newlines + + // tabs) collapse to ONE space. + const out = canonicalizeFragment({ + content: " hello\n\n world ", + title: "foo bar", + provenance: { + // Nested string also normalized. + note: "alpha\n\tbeta", + }, + }) as { + content: string; + title: string; + provenance: { note: string }; + }; + expect(out.content).toBe("hello world"); + expect(out.title).toBe("foo bar"); + expect(out.provenance.note).toBe("alpha beta"); + + // Whitespace-only string collapses to empty (trim removes everything). + const ws = canonicalizeFragment({ s: " \n\t " }) as { s: string }; + expect(ws.s).toBe(""); + + // Numeric canonicalization: 1.0 and 1 must compare equal after canonicalize. + const n1 = JSON.stringify(canonicalizeFragment({ x: 1.0 })); + const n2 = JSON.stringify(canonicalizeFragment({ x: 1 })); + expect(n1).toBe(n2); + // Non-integer numerics also round-trip stably. + const n3 = JSON.stringify(canonicalizeFragment({ x: 1.5 })); + const n4 = JSON.stringify(canonicalizeFragment({ x: 1.5 })); + expect(n3).toBe(n4); + + // Array order PRESERVED — ["a","b"] must NOT canonicalize equal to ["b","a"]. + const ab = JSON.stringify(canonicalizeFragment({ arr: ["a", "b"] })); + const ba = JSON.stringify(canonicalizeFragment({ arr: ["b", "a"] })); + expect(ab).not.toBe(ba); + + // Same array order DOES canonicalize equal, even with nested objects whose + // keys are permuted. + const ev1 = JSON.stringify( + canonicalizeFragment({ + evidence: [ + { kind: "url", ref: "x" }, + { kind: "url", ref: "y" }, + ], + }), + ); + const ev2 = JSON.stringify( + canonicalizeFragment({ + evidence: [ + { ref: "x", kind: "url" }, + { ref: "y", kind: "url" }, + ], + }), + ); + expect(ev1).toBe(ev2); + }); +}); diff --git a/src/atlas/canonicalize.ts b/src/atlas/canonicalize.ts index ef55cce..6e0e870 100644 --- a/src/atlas/canonicalize.ts +++ b/src/atlas/canonicalize.ts @@ -302,3 +302,75 @@ export function canonicalize(fragments: CandidateFragment[]): Candidate[] { }); return candidates; } + +// ── canonicalizeFragment ────────────────────────────────────────────────────── +// +// Per-fragment structural normalizer for the Phase-2 dual-run shadow comparator +// (spec §6.2). Two fragments are considered "the same" iff +// `JSON.stringify(canonicalizeFragment(a)) === JSON.stringify(canonicalizeFragment(b))`, +// so the output must be stable under input variations that the comparator +// does NOT care about: object-key insertion order, whitespace inside free-text +// fields, and `1.0` vs `1` numeric encoding from the model. +// +// Distinct from the Tier-3 `canonicalize(fragments[])` ranker above — that one +// dedups+ranks an ARRAY of CandidateFragments, this one structurally normalizes +// ONE fragment-shaped object for byte-equality comparison. The two names +// intentionally diverge from the spec's `canonicalize(fragment)` so they can +// coexist in this module without shadowing each other; T1, T2 and T10 import +// `canonicalizeFragment` by name (see SLOT-1 of the impl plan, spec footer). +// +// Behavior (§6.2): +// (a) Recursive object-key sort — keys at every depth emit in sorted order. +// (b) String-field whitespace normalization — trim leading/trailing +// whitespace, then collapse every internal run of whitespace + newlines +// (\s+) to a single space. This is **intentionally LOSSY** on free-text +// fields: multi-line `content` formatting and intentional double-spaces +// are flattened. Accepted because byte-identity on free-text from two +// independent LLM draws is infeasible even with a fixed seed; what we +// actually want to compare is structure + content-modulo-whitespace. +// (c) Numeric canonicalization — `Number(n).toString()` round-trip via +// `+value`, so `1.0` and `1` and `1.00` all normalize to the same +// in-memory number and JSON.stringify identically. Non-finite values +// (NaN, ±Infinity) are passed through unchanged (JSON.stringify will +// emit them as `null`, which is the same null on both sides). +// (d) Array order PRESERVED — arrays are NOT sorted. `evidence[]` and +// `validationTargets[]` element order is load-bearing and must compare +// positionally. Array ELEMENTS are recursively normalized in place. +// +// Pure: never mutates the input. +export function canonicalizeFragment(fragment: object): object { + return canonicalizeValue(fragment) as object; +} + +function canonicalizeValue(value: unknown): unknown { + if (typeof value === "string") { + // §6.2(b): trim then collapse internal whitespace+newline runs to ONE + // space. \s covers spaces, tabs, newlines, CR, vertical tab, form feed. + return value.trim().replace(/\s+/g, " "); + } + if (typeof value === "number") { + // §6.2(c): round-trip through Number so 1.0 ≡ 1. Pass non-finite values + // through unchanged (Number(NaN) === NaN; JSON.stringify will emit null + // on both sides regardless). + return Number.isFinite(value) ? +value : value; + } + if (Array.isArray(value)) { + // §6.2(d): preserve element order; recurse into each element. + return value.map(canonicalizeValue); + } + if (value !== null && typeof value === "object") { + // §6.2(a): emit keys in sorted order. We build a NEW object inserting keys + // in sorted order; modern V8 preserves insertion order for string keys, so + // JSON.stringify emits them in that order — making the stringified output + // stable across key-permuted inputs. + const entries = Object.entries(value as Record); + entries.sort(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0)); + const out: Record = {}; + for (const [k, v] of entries) { + out[k] = canonicalizeValue(v); + } + return out; + } + // booleans, null, undefined, bigint, symbol → unchanged. + return value; +} From c923c1497e7b0930dc5e4ed0c2b0d773d1a303ec Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 12 Jun 2026 11:36:52 -0700 Subject: [PATCH 2/6] Add EpisodicCandidateFragmentSchema with sensitivity-floor transform and 4 invariants --- src/__tests__/atlas-episodic-schema.test.ts | 149 ++++++++++++++++++++ src/atlas/types.ts | 51 +++++++ 2 files changed, 200 insertions(+) create mode 100644 src/__tests__/atlas-episodic-schema.test.ts diff --git a/src/__tests__/atlas-episodic-schema.test.ts b/src/__tests__/atlas-episodic-schema.test.ts new file mode 100644 index 0000000..2dbd707 --- /dev/null +++ b/src/__tests__/atlas-episodic-schema.test.ts @@ -0,0 +1,149 @@ +import { describe, it, expect } from "vitest"; +import { EpisodicCandidateFragmentSchema } from "../atlas/types.js"; + +// ── T6 — EpisodicCandidateFragmentSchema invariants (spec §4.6 / §7.3) ──────── +// +// EpisodicCandidateFragmentSchema narrows CandidateFragmentSchema with five +// episodic-leaf invariants: +// - needsReview === true (refine, reject) +// - provenance_class === "derived" (refine, reject) +// - confidence === "low" (refine, reject) +// - validation_status === "unverified" (refine, reject) +// - sensitivity floor "internal" (transform: "public" coerced up; stronger +// values preserved verbatim — NOT a reject-below rule) +// +// These tests prove (i) the sensitivity transform coerces the four input +// sensitivities correctly, and (ii) each of the four predicate invariants +// rejects with an error path/message that names the violated field. + +// Base fixture: a structurally-valid episodic fragment with every episodic +// invariant satisfied. Per-test variants clone this and mutate ONE field so the +// failure cause is unambiguous. +const baseEpisodic = () => ({ + sourcetype: "episodic" as const, + subsystem: "agent-orchestration", + source_name: "session-2026-06-12", + title: + "Blitz manifest decomposition is the orchestrator's job, not the executor's", + content: + "When the user invokes a blitz, the orchestrator (not a sub-agent) decomposes the plan into Depends-annotated slot tasks. Executors receive a single pre-computed slot and never see the manifest.", + provenance: { + source: "episodic-session", + classification: { + sensitivity: "internal" as const, + knowledge_type: "process" as const, + audience: "all-staff", + validation_status: "unverified" as const, + confidence: "low" as const, + provenance_class: "derived" as const, + freshness: { as_of: "2026-06-12" }, + }, + }, + evidence: [], + needsReview: true, + validationTargets: [], +}); + +describe("EpisodicCandidateFragmentSchema — sensitivity-floor transform", () => { + it("coerces sensitivity=public up to internal", () => { + const input = baseEpisodic(); + input.provenance.classification.sensitivity = "public" as "internal"; + const parsed = EpisodicCandidateFragmentSchema.parse(input); + expect(parsed.provenance.classification.sensitivity).toBe("internal"); + }); + + it("preserves sensitivity=internal verbatim", () => { + const input = baseEpisodic(); + input.provenance.classification.sensitivity = "internal"; + const parsed = EpisodicCandidateFragmentSchema.parse(input); + expect(parsed.provenance.classification.sensitivity).toBe("internal"); + }); + + it("preserves sensitivity=proprietary verbatim (stronger than floor)", () => { + const input = baseEpisodic(); + input.provenance.classification.sensitivity = "proprietary" as "internal"; + const parsed = EpisodicCandidateFragmentSchema.parse(input); + expect(parsed.provenance.classification.sensitivity).toBe("proprietary"); + }); + + it("preserves sensitivity=secret verbatim (strongest)", () => { + const input = baseEpisodic(); + input.provenance.classification.sensitivity = "secret" as "internal"; + const parsed = EpisodicCandidateFragmentSchema.parse(input); + expect(parsed.provenance.classification.sensitivity).toBe("secret"); + }); + + it("does not mutate the caller's input when coercing sensitivity to floor", () => { + // Regression: a `.transform` that writes through `f.provenance.classification.sensitivity = ...` + // would mutate the caller's input. Zod actually rebuilds the object graph on parse, + // so the transform is non-mutating in practice. This test pins that empirical guarantee. + // If a future maintainer "optimizes" the transform to in-place mutation (or Zod's + // semantics change), this test catches it before the regression ships. + const input = baseEpisodic(); + input.provenance.classification.sensitivity = "public" as "internal"; + const snapshot = structuredClone(input); + const parsed = EpisodicCandidateFragmentSchema.parse(input); + expect(input).toEqual(snapshot); // input not mutated + expect(input.provenance.classification.sensitivity).toBe("public"); + expect(parsed.provenance.classification.sensitivity).toBe("internal"); // coerced on output + }); +}); + +describe("EpisodicCandidateFragmentSchema — predicate-refinement rejections", () => { + it("rejects needsReview=false (episodic must be needsReview=true)", () => { + const input = baseEpisodic(); + input.needsReview = false; + const result = EpisodicCandidateFragmentSchema.safeParse(input); + expect(result.success).toBe(false); + if (!result.success) { + const issue = result.error.issues.find((i) => + i.path.includes("needsReview"), + ); + expect(issue).toBeDefined(); + expect(issue!.message).toMatch(/needsReview/); + } + }); + + it("rejects provenance_class=primary (episodic must be derived)", () => { + const input = baseEpisodic(); + input.provenance.classification.provenance_class = "primary" as "derived"; + const result = EpisodicCandidateFragmentSchema.safeParse(input); + expect(result.success).toBe(false); + if (!result.success) { + const issue = result.error.issues.find((i) => + i.path.includes("provenance_class"), + ); + expect(issue).toBeDefined(); + expect(issue!.message).toMatch(/provenance_class/); + } + }); + + it("rejects confidence=high (episodic must be confidence=low)", () => { + const input = baseEpisodic(); + input.provenance.classification.confidence = "high" as "low"; + const result = EpisodicCandidateFragmentSchema.safeParse(input); + expect(result.success).toBe(false); + if (!result.success) { + const issue = result.error.issues.find((i) => + i.path.includes("confidence"), + ); + expect(issue).toBeDefined(); + expect(issue!.message).toMatch(/confidence/); + } + }); + + it("rejects validation_status=source-verified (episodic must be unverified)", () => { + const input = baseEpisodic(); + input.provenance.classification.validation_status = + "source-verified" as "unverified"; + const result = EpisodicCandidateFragmentSchema.safeParse(input); + expect(result.success).toBe(false); + if (!result.success) { + const issue = result.error.issues.find((i) => + i.path.includes("validation_status"), + ); + expect(issue).toBeDefined(); + expect(issue!.message).toMatch(/validation_status/); + } + }); +}); diff --git a/src/atlas/types.ts b/src/atlas/types.ts index b0b3ca6..722f8d8 100644 --- a/src/atlas/types.ts +++ b/src/atlas/types.ts @@ -147,6 +147,57 @@ export const CandidateFragmentSchema = CandidateFragmentObject.refine( SUBSYSTEM_NO_DELIMITER_ISSUE, ); +// ── EpisodicCandidateFragmentSchema (spec §4.6) ─────────────────────────────── +// +// A wrapping variant of `CandidateFragmentSchema` that enforces the five +// episodic-leaf invariants. Four are predicate refinements that REJECT on +// violation; the fifth (sensitivity) is a `.transform()` that COERCES UP to +// the `"internal"` floor — `"public"` is silently rewritten to `"internal"`, +// while `"proprietary"` / `"secret"` are preserved verbatim. This is the +// "coerce up to floor" rule, NOT "reject below floor". +// +// The CLI helper detects `sourcetype === "episodic"` after the base parse and +// runs this narrowed schema as a SECOND parse. The on-disk fragment carries +// the coerced sensitivity value. +// +// If you add a `.regex(...)`, `.refine(...)`, or `.transform(...)` here, update +// the §4.1.1 refinement-audit test (T9) — JSON Schema conversion silently +// drops these and they must be wired into the post-pass. +export const EpisodicCandidateFragmentSchema = CandidateFragmentSchema.refine( + (f) => f.needsReview === true, + { + message: "episodic fragment requires needsReview=true", + path: ["needsReview"], + }, +) + .refine((f) => f.provenance.classification.provenance_class === "derived", { + message: "episodic requires provenance_class=derived", + path: ["provenance", "classification", "provenance_class"], + }) + .refine((f) => f.provenance.classification.confidence === "low", { + message: "episodic requires confidence=low (clamped)", + path: ["provenance", "classification", "confidence"], + }) + .refine( + (f) => f.provenance.classification.validation_status === "unverified", + { + message: "episodic requires validation_status=unverified", + path: ["provenance", "classification", "validation_status"], + }, + ) + // Sensitivity-floor transform: coerce up to "internal" floor (NOT reject-below). + // "public" → "internal"; "internal" / "proprietary" / "secret" preserved verbatim. + .transform((f) => { + if (f.provenance.classification.sensitivity === "public") { + f.provenance.classification.sensitivity = "internal"; + } + return f; + }); + +export type EpisodicCandidateFragment = z.infer< + typeof EpisodicCandidateFragmentSchema +>; + // ── Candidate (Tier-3 finalized row, 1:1 with an atlas_seed_entries row) ─────── export const CandidateSchema = CandidateFragmentObject.extend({ From 6825bdc34db839c19f2eb0cdbc1a479b88aa0227 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 12 Jun 2026 11:36:52 -0700 Subject: [PATCH 3/6] Derive JSON Schema from CandidateFragment via zod-to-json-schema --- package-lock.json | 19 ++- package.json | 5 +- src/__tests__/atlas-json-schema.test.ts | 188 ++++++++++++++++++++++++ src/atlas/json-schema.ts | 90 ++++++++++++ 4 files changed, 293 insertions(+), 9 deletions(-) create mode 100644 src/__tests__/atlas-json-schema.test.ts create mode 100644 src/atlas/json-schema.ts diff --git a/package-lock.json b/package-lock.json index 76f5089..4a9978d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -28,7 +28,8 @@ "pgvector": "^0.2.0", "simple-git": "^3.27.0", "yaml": "^2.8.3", - "zod": "^3.23.8" + "zod": "^3.23.8", + "zod-to-json-schema": "^3.25.2" }, "bin": { "atlas": "dist/atlas-cli.js", @@ -44,6 +45,8 @@ "@types/jsdom": "^28.0.1", "@types/node": "^25.0.6", "@types/pg": "^8.11.10", + "ajv": "^8.20.0", + "ajv-formats": "^3.0.1", "jsdom": "^28.0.0", "tsx": "^4.21.0", "typescript": "^5.9.3", @@ -1829,9 +1832,9 @@ } }, "node_modules/ajv": { - "version": "8.18.0", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.18.0.tgz", - "integrity": "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==", + "version": "8.20.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.20.0.tgz", + "integrity": "sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA==", "license": "MIT", "dependencies": { "fast-deep-equal": "^3.1.3", @@ -5931,12 +5934,12 @@ } }, "node_modules/zod-to-json-schema": { - "version": "3.25.1", - "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.25.1.tgz", - "integrity": "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==", + "version": "3.25.2", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.25.2.tgz", + "integrity": "sha512-O/PgfnpT1xKSDeQYSCfRI5Gy3hPf91mKVDuYLUHZJMiDFptvP41MSnWofm8dnCm0256ZNfZIM7DSzuSMAFnjHA==", "license": "ISC", "peerDependencies": { - "zod": "^3.25 || ^4" + "zod": "^3.25.28 || ^4" } } } diff --git a/package.json b/package.json index c5f3438..f856913 100644 --- a/package.json +++ b/package.json @@ -72,7 +72,8 @@ "pgvector": "^0.2.0", "simple-git": "^3.27.0", "yaml": "^2.8.3", - "zod": "^3.23.8" + "zod": "^3.23.8", + "zod-to-json-schema": "^3.25.2" }, "peerDependencies": { "@xenova/transformers": "^2.17.0", @@ -100,6 +101,8 @@ "@types/jsdom": "^28.0.1", "@types/node": "^25.0.6", "@types/pg": "^8.11.10", + "ajv": "^8.20.0", + "ajv-formats": "^3.0.1", "jsdom": "^28.0.0", "tsx": "^4.21.0", "typescript": "^5.9.3", diff --git a/src/__tests__/atlas-json-schema.test.ts b/src/__tests__/atlas-json-schema.test.ts new file mode 100644 index 0000000..82d1f78 --- /dev/null +++ b/src/__tests__/atlas-json-schema.test.ts @@ -0,0 +1,188 @@ +// T3 — Atlas JSON Schema derivation tests (spec §4.1, §7.1, §7.3). +// +// Verifies the family-picker + the two derived JSON Schema documents in +// `src/atlas/json-schema.ts`: +// +// 1. Each derived schema has the expected top-level structure (`object` +// type at the schema root, regardless of whether zod-to-json-schema +// wraps it in `definitions`). +// 2. `jsonSchemaForFamily("episodic")` returns the episodic schema by +// reference; every other `SourceType` returns the base schema. +// 3. Conformance test — a known-good fragment validates against the +// derived JSON Schema via ajv. This guards against silent prop-drop +// in the zod-to-json-schema converter (any required property the +// converter loses would cause the validator to accept a malformed +// fragment OR reject a valid one). +// +// ajv is a JSON Schema validator (Draft-07 / 2019-09 / 2020-12); the +// zod-to-json-schema output targets Draft-07 by default, which ajv@8 +// supports natively. ajv-formats wires the standard format keywords +// (`uri`, `date-time`, etc.) even though we don't currently use them on +// the fragment — added defensively so a future `.regex(...)` / format +// constraint added to types.ts surfaces immediately. + +import { describe, it, expect } from "vitest"; +// ajv + ajv-formats ship CJS default exports; under our ESM `"type": "module"` +// + `verbatimModuleSyntax` config, the runtime value lives on `.default` while +// the type still resolves through the namespace import. Pull both off `.default`. +import * as ajvNs from "ajv"; +import * as ajvFormatsNs from "ajv-formats"; +const Ajv = (ajvNs as unknown as { default: typeof import("ajv").default }) + .default; +const addFormats = ( + ajvFormatsNs as unknown as { default: typeof import("ajv-formats").default } +).default; + +import { + CANDIDATE_FRAGMENT_JSON_SCHEMA, + EPISODIC_CANDIDATE_FRAGMENT_JSON_SCHEMA, + jsonSchemaForFamily, +} from "../atlas/json-schema.js"; +import { + CandidateFragmentSchema, + type CandidateFragment, +} from "../atlas/types.js"; + +// A structurally-valid CandidateFragment matching the §9.3 contract. Mirrors +// the cross-source-subsystem fixture shape (notion-doc fragment from +// fixtures/atlas/aggregate/cross-source-subsystem.json) but inlined so the +// test does not depend on fixture-file path stability. +const KNOWN_GOOD_FRAGMENT: CandidateFragment = { + sourcetype: "notion-doc", + subsystem: "agui-protocol", + claimSlugHint: "interrupt-resume-keying", + source_name: "notion-doc", + repo_url: "https://github.com/ag-ui-protocol/ag-ui", + ref: "interrupts-adr", + title: "Interrupt resume links via interruptId, NOT parentRunId", + content: + "The Interrupts design decided a resume is linked to its interrupt via interruptId rather than parentRunId.", + provenance: { + source: "notion-doc", + url: "https://www.notion.so/copilotkit/Interrupts-Proposal-Design-Decisions-Reasoning", + date: "2026-04-18", + classification: { + sensitivity: "internal", + knowledge_type: "design-rationale", + audience: "engineering", + validation_status: "source-verified", + confidence: "high", + provenance_class: "primary", + freshness: { as_of: "2026-04-18", re_verify_by: "2026-09-18" }, + }, + }, + evidence: [ + { + kind: "thread", + body: "Interrupts Proposal — Design Decisions & Reasoning", + }, + ], + needsReview: false, + validationTargets: [], +}; + +// Sanity check — the inline fragment matches the Zod contract. If this fails, +// the conformance assertion below would test a different schema than the one +// the rest of the harvest pipeline parses (silent test rot). +describe("KNOWN_GOOD_FRAGMENT sanity", () => { + it("parses against the Zod CandidateFragmentSchema", () => { + expect(() => + CandidateFragmentSchema.parse(KNOWN_GOOD_FRAGMENT), + ).not.toThrow(); + }); +}); + +// `zod-to-json-schema` with `name: "..."` wraps the derived schema in a +// top-level `{ $ref: "#/definitions/", definitions: { : {...} } }` +// container. With `$refStrategy: "none"` the SUB-schemas are inlined, but +// the OUTER wrapper still exists. Both ajv and a manual structural check +// need to drill through `definitions[name]` to reach the actual schema body. +function rootSchemaBody( + schema: Record, + name: string, +): Record { + const defs = schema.definitions as + | Record> + | undefined; + if (defs && defs[name]) return defs[name]; + // Fallback if a future zod-to-json-schema version stops wrapping. + return schema; +} + +describe("CANDIDATE_FRAGMENT_JSON_SCHEMA shape", () => { + it("derives an object schema with the expected required keys", () => { + const schema = CANDIDATE_FRAGMENT_JSON_SCHEMA as Record; + const body = rootSchemaBody(schema, "CandidateFragment"); + expect(body.type).toBe("object"); + // Top-level required-shape keys per spec §9.3 (mirrors + // CandidateFragmentObject.shape in types.ts). + const properties = body.properties as Record; + expect(properties).toBeDefined(); + for (const key of [ + "sourcetype", + "subsystem", + "source_name", + "title", + "content", + "provenance", + "evidence", + "needsReview", + "validationTargets", + ]) { + expect(Object.keys(properties)).toContain(key); + } + }); + + it("validates a known-good fragment via ajv", () => { + const ajv = new Ajv({ strict: false, allErrors: true }); + addFormats(ajv); + const validate = ajv.compile(CANDIDATE_FRAGMENT_JSON_SCHEMA); + const ok = validate(KNOWN_GOOD_FRAGMENT); + if (!ok) { + // Surface ajv errors so a converter regression is debuggable. + throw new Error( + `ajv rejected a known-good fragment: ${JSON.stringify(validate.errors, null, 2)}`, + ); + } + expect(ok).toBe(true); + }); +}); + +describe("EPISODIC_CANDIDATE_FRAGMENT_JSON_SCHEMA shape", () => { + it("derives an object schema (sub-shape inherited from base)", () => { + const schema = EPISODIC_CANDIDATE_FRAGMENT_JSON_SCHEMA as Record< + string, + unknown + >; + const body = rootSchemaBody(schema, "EpisodicCandidateFragment"); + expect(body.type).toBe("object"); + const properties = body.properties as Record; + expect(properties).toBeDefined(); + expect(Object.keys(properties)).toContain("sourcetype"); + expect(Object.keys(properties)).toContain("needsReview"); + }); +}); + +describe("jsonSchemaForFamily", () => { + it("returns the episodic schema for family=episodic", () => { + expect(jsonSchemaForFamily("episodic")).toBe( + EPISODIC_CANDIDATE_FRAGMENT_JSON_SCHEMA, + ); + }); + + it("returns the base schema for non-episodic families", () => { + // Cover several non-episodic SourceType values to guard against a future + // `if/else` ladder that special-cases more than just episodic. + for (const family of [ + "memory", + "github-pr", + "github-issue", + "notion-doc", + "linear-doc", + "agent-doc", + "derived", + ] as const) { + expect(jsonSchemaForFamily(family)).toBe(CANDIDATE_FRAGMENT_JSON_SCHEMA); + } + }); +}); diff --git a/src/atlas/json-schema.ts b/src/atlas/json-schema.ts new file mode 100644 index 0000000..77aaf5d --- /dev/null +++ b/src/atlas/json-schema.ts @@ -0,0 +1,90 @@ +// Atlas JSON Schema derivation (spec §4.1). +// +// The orchestration shell that fans out atlas harvest leaves passes a JSON +// Schema document to the harness `agent(prompt, {schema})` call so the model +// emits a structurally-valid CandidateFragment by construction (Route-B of +// spec §4). The atlas package owns the DERIVATION of that schema — a single +// source of truth wired to the Zod contract in `./types.ts` — so the shell +// can boot, call `jsonSchemaForFamily(family)`, and hand the result to the +// harness without re-implementing schema conversion. +// +// Per spec §4.1.1, Zod `.refine(...)` / `.transform(...)` constraints are +// silently dropped by `zod-to-json-schema` (they are runtime predicates, not +// structural). Two callouts: +// 1. The `subsystemHasNoDelimiter` refinement on `CandidateFragmentSchema` +// is dropped here; the post-pass Zod parse in +// `atlas harvest write-fragment --stdin` still rejects. +// 2. The four episodic predicate refinements on +// `EpisodicCandidateFragmentSchema` (needsReview, provenance_class, +// confidence, validation_status) and the sensitivity-floor +// `.transform()` are runtime-only and are silently dropped by +// `zod-to-json-schema`; they are re-applied by the post-pass +// `EpisodicCandidateFragmentSchema.parse(...)` in +// `atlas harvest write-fragment --stdin`. The derived JSON Schema +// therefore enforces only the base structural contract — the episodic +// clamps live in the Zod post-pass. + +import { zodToJsonSchema } from "zod-to-json-schema"; + +import { + CandidateFragmentSchema, + EpisodicCandidateFragmentSchema, + type CandidateFragment, +} from "./types.js"; + +// `sourcetype` is an inline enum on `CandidateFragmentObject` and is not +// re-exported as a named symbol from `./types.js`. Derive it from the +// inferred `CandidateFragment` type so this file stays in lock-step with the +// Zod contract (any addition to the enum surfaces here as a type error +// where `jsonSchemaForFamily` switches on it). +export type SourceType = CandidateFragment["sourcetype"]; + +// Base CandidateFragment JSON Schema (spec §4.1). +// +// `$refStrategy: "none"` inlines every sub-schema so the result is a single +// self-contained document with no `$ref` indirection. The harness consumes +// this schema directly; inlining keeps the wire payload self-describing and +// avoids `$defs` resolution ordering issues across harness implementations. +export const CANDIDATE_FRAGMENT_JSON_SCHEMA: object = zodToJsonSchema( + CandidateFragmentSchema, + { name: "CandidateFragment", $refStrategy: "none" }, +); + +// Episodic-narrowed CandidateFragment JSON Schema (spec §4.6). +// +// `EpisodicCandidateFragmentSchema` adds four predicate refinements +// (needsReview=true, provenance_class=derived, confidence=low, +// validation_status=unverified) and one `.transform()` (sensitivity floor). +// `zod-to-json-schema` drops ALL of these because they are runtime-only +// (refine/transform never round-trip into JSON Schema). The shell-side +// schema therefore expresses only the base structural shape; the four +// predicate clamps and the sensitivity-floor transform are re-applied by +// the post-pass `EpisodicCandidateFragmentSchema.parse(...)` in +// `atlas harvest write-fragment --stdin` (spec §4.2.1, step 3). +export const EPISODIC_CANDIDATE_FRAGMENT_JSON_SCHEMA: object = zodToJsonSchema( + EpisodicCandidateFragmentSchema, + { name: "EpisodicCandidateFragment", $refStrategy: "none" }, +); + +/** + * Family-picker for the harness `agent(prompt, {schema})` call. + * + * Returns the JSON Schema document the orchestration shell should hand to + * the harness for a given leaf family: + * - `"episodic"` → {@link EPISODIC_CANDIDATE_FRAGMENT_JSON_SCHEMA} + * - any other `SourceType` value → {@link CANDIDATE_FRAGMENT_JSON_SCHEMA} + * + * This is the canonical entrypoint for the shell — the shell never imports + * the two `*_JSON_SCHEMA` constants directly; it switches on the family it + * is dispatching and lets this helper return the right document. + * + * Note: the returned schema is structural ONLY. The runtime-only Zod + * refinements (subsystem-delimiter guard, episodic invariant clamps, + * sensitivity-floor transform) are still enforced post-write by the + * `atlas harvest write-fragment --stdin` Zod parse — see spec §4.1.1. + */ +export function jsonSchemaForFamily(family: SourceType): object { + return family === "episodic" + ? EPISODIC_CANDIDATE_FRAGMENT_JSON_SCHEMA + : CANDIDATE_FRAGMENT_JSON_SCHEMA; +} From dde4718f6560e599892722e340b22ab856dfe665 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 12 Jun 2026 11:36:52 -0700 Subject: [PATCH 4/6] Add atlas harvest write-fragment --stdin CLI with 0/1/2/3/4 exit-code matrix --- .../atlas-write-fragment-cli.test.ts | 629 ++++++++++++++++++ src/atlas/harvest-cli.ts | 310 ++++++++- 2 files changed, 938 insertions(+), 1 deletion(-) create mode 100644 src/__tests__/atlas-write-fragment-cli.test.ts diff --git a/src/__tests__/atlas-write-fragment-cli.test.ts b/src/__tests__/atlas-write-fragment-cli.test.ts new file mode 100644 index 0000000..9d57893 --- /dev/null +++ b/src/__tests__/atlas-write-fragment-cli.test.ts @@ -0,0 +1,629 @@ +// atlas harvest write-fragment --stdin CLI integration tests (spec §4.2.1, T8a-e + T11). +// +// Invokes the BUILT CLI as a subprocess via `node dist/atlas-cli.js harvest +// write-fragment ...`, feeds it stdin, and asserts the exit-code matrix +// 0/1/2/3/4 plus side effects: +// T8a — exit 0 with explicit --stem: file lands at the expected path. +// T8b — exit 0 with derived stem (no --stem): file lands at the +// canonical-key-derived stem path. +// T8c — exit 1 on bad stdin JSON: stderr names the JSON parse failure. +// T8d — exit 3 on base-schema failure (missing required field). +// T8e — exit 4 on episodic invariant failure (needsReview=false). +// T11 — exit 2 on stem collision (second write to the same stem). +// +// Each test runs inside its own tempdir so concurrent test execution does not +// cross-pollute fragments. The dist build is assumed already done by +// `npm run build` (test suite's standard prerequisite); a fast guard at the top +// fails loud if dist/atlas-cli.js is missing rather than running tests against +// a stale or absent build artifact. + +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { afterEach, beforeAll, beforeEach, describe, expect, it } from "vitest"; +import { z } from "zod"; + +import { claimSlug } from "../atlas/canonicalize.js"; +import { isEpisodicInvariantIssue } from "../atlas/harvest-cli.js"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const REPO_ROOT = path.resolve(__dirname, "..", ".."); +const CLI_PATH = path.join(REPO_ROOT, "dist", "atlas-cli.js"); + +// A baseline CandidateFragment that passes CandidateFragmentSchema. Tests +// shallow-clone + mutate to produce schema-failing / episodic-failing inputs. +function baseFragment(overrides: Record = {}): unknown { + return { + sourcetype: "github-pr", + subsystem: "cpk-runtime", + claimSlugHint: "explicit-hint-wins", + source_name: "github-pr", + repo_url: "https://github.com/CopilotKit/CopilotKit", + ref: "main", + title: "Some distilled claim about the runtime", + content: "why/how prose", + provenance: { + source: "github-pr", + date: "2026-06-08", + classification: { + sensitivity: "internal", + knowledge_type: "architecture", + audience: "all-staff", + validation_status: "source-verified", + confidence: "high", + provenance_class: "primary", + freshness: { as_of: "2026-06-08" }, + }, + }, + evidence: [], + needsReview: false, + validationTargets: [], + ...overrides, + }; +} + +// Run the CLI with the provided stdin and argv tail. Returns the raw spawn +// result so each test can assert exit code + stderr/stdout shape. +function runCli(args: string[], stdin: string) { + return spawnSync("node", [CLI_PATH, "harvest", "write-fragment", ...args], { + input: stdin, + encoding: "utf-8", + }); +} + +describe("atlas harvest write-fragment --stdin CLI (spec §4.2 / T8 + T11)", () => { + let runsDir: string; + const runId = "test-run"; + + beforeAll(() => { + // Fail loud if the dist build is missing — running these tests against an + // absent build artifact would be a silent green-on-nothing pass. + if (!fs.existsSync(CLI_PATH)) { + throw new Error( + `dist build is missing (${CLI_PATH}); run \`npm run build\` first`, + ); + } + }); + + beforeEach(() => { + runsDir = fs.mkdtempSync(path.join(os.tmpdir(), "atlas-wf-")); + }); + + afterEach(() => { + try { + fs.rmSync(runsDir, { recursive: true, force: true }); + } catch { + // Tempdir cleanup is best-effort; OS tempdir reaper handles leftovers. + } + }); + + // T8a — explicit --stem, valid input → exit 0, file present at expected path. + it("T8a: exits 0 and writes the fragment when --stem is explicit", () => { + const stem = "explicit-stem"; + const result = runCli( + ["--run-id", runId, "--runs-dir", runsDir, "--stem", stem], + JSON.stringify(baseFragment()), + ); + expect(result.status).toBe(0); + const expected = path.join(runsDir, runId, "fragments", `${stem}.json`); + expect(fs.existsSync(expected)).toBe(true); + // stdout reports the absolute path the file was written to. + expect(result.stdout.trim()).toBe(path.resolve(expected)); + const written = JSON.parse(fs.readFileSync(expected, "utf-8")); + expect(written.sourcetype).toBe("github-pr"); + expect(written.subsystem).toBe("cpk-runtime"); + }); + + // T8b — no --stem, valid input → exit 0, file present at claimSlug-derived path. + it("T8b: exits 0 and derives the stem from canonical-key components when --stem is omitted", () => { + const result = runCli( + ["--run-id", runId, "--runs-dir", runsDir], + JSON.stringify(baseFragment()), + ); + expect(result.status).toBe(0); + // The derived stem is claimSlug("::"), where + // the inner claim-slug comes from claimSlugHint (preferred) or title. + const inner = claimSlug("explicit-hint-wins"); + const expectedStem = claimSlug(`github-pr:cpk-runtime:${inner}`); + const expected = path.join( + runsDir, + runId, + "fragments", + `${expectedStem}.json`, + ); + expect(fs.existsSync(expected)).toBe(true); + expect(result.stdout.trim()).toBe(path.resolve(expected)); + }); + + // T8c — non-JSON stdin → exit 1, stderr mentions JSON. + it("T8c: exits 1 on un-parseable stdin", () => { + const result = runCli( + ["--run-id", runId, "--runs-dir", runsDir, "--stem", "bad-json"], + "not-json{", + ); + expect(result.status).toBe(1); + expect(result.stderr.toLowerCase()).toMatch(/json/); + // The fragments dir for this run should not have been created on a bail + // BEFORE the schema step. + const fragsDir = path.join(runsDir, runId, "fragments"); + expect(fs.existsSync(fragsDir)).toBe(false); + }); + + // T8d — valid JSON, missing required field (no `content`) → exit 3. + it("T8d: exits 3 when the input fails CandidateFragmentSchema", () => { + const bad = baseFragment(); + delete (bad as { content?: unknown }).content; + const result = runCli( + ["--run-id", runId, "--runs-dir", runsDir, "--stem", "schema-bad"], + JSON.stringify(bad), + ); + expect(result.status).toBe(3); + expect(result.stderr.toLowerCase()).toMatch(/schema|content/); + const expected = path.join(runsDir, runId, "fragments", "schema-bad.json"); + expect(fs.existsSync(expected)).toBe(false); + }); + + // T8e — episodic input with needsReview=false → exit 4 (episodic invariant). + it("T8e: exits 4 on an episodic invariant violation (needsReview=false)", () => { + // An episodic fragment that satisfies the BASE schema but violates the + // episodic refinements: needsReview must be true, provenance_class must be + // "derived", confidence must be "low", validation_status must be + // "unverified". We flip needsReview only — the rest are episodic-shaped + // already — so the first failing invariant is needsReview. + const episodic = baseFragment({ + sourcetype: "episodic", + needsReview: false, // ← the failing invariant + provenance: { + source: "episodic", + date: "2026-06-08", + classification: { + sensitivity: "internal", + knowledge_type: "architecture", + audience: "all-staff", + validation_status: "unverified", + confidence: "low", + provenance_class: "derived", + freshness: { as_of: "2026-06-08" }, + }, + }, + }); + const result = runCli( + ["--run-id", runId, "--runs-dir", runsDir, "--stem", "episodic-bad"], + JSON.stringify(episodic), + ); + expect(result.status).toBe(4); + expect(result.stderr.toLowerCase()).toMatch(/needsreview|episodic/); + const expected = path.join( + runsDir, + runId, + "fragments", + "episodic-bad.json", + ); + expect(fs.existsSync(expected)).toBe(false); + }); + + // T11 — second write to same stem → exit 2 with EEXIST-style error. + it("T11: exits 2 on stem collision (second write to the same stem)", () => { + const args = [ + "--run-id", + runId, + "--runs-dir", + runsDir, + "--stem", + "collide-me", + ]; + const first = runCli(args, JSON.stringify(baseFragment())); + expect(first.status).toBe(0); + const second = runCli(args, JSON.stringify(baseFragment())); + expect(second.status).toBe(2); + // Tighten beyond `/already exists/i`: kernel mkdir-EEXIST text ALSO contains + // "file already exists", so a regression that re-collapses the mkdir+write + // try-blocks would silently pass with the loose regex. Pin to OUR exit-2 + // wording — `${stem}.json already exists at ${filePath}` — which the kernel + // EEXIST string does NOT emit. + expect(second.stderr).toMatch(/\.json already exists at /); + }); + + // T-1 boundary tests: episodic + base-schema (invalid_type / invalid_enum_value) + // failures must route to exit 3, not exit 4. The exit-4 lane is reserved for + // refinement (code: "custom") issues from EpisodicCandidateFragmentSchema's + // `.refine(...)` calls. A wrong-typed `needsReview` (string instead of bool) + // surfaces as `invalid_type` from the BASE schema and is a schema-validation + // failure, NOT an episodic invariant violation. + + // Build an episodic fragment that satisfies the four refinements (so the only + // failure surfaced is the caller-injected base-schema breakage). + function baseEpisodicFragment( + overrides: Record = {}, + ): Record { + return { + sourcetype: "episodic", + subsystem: "cpk-runtime", + claimSlugHint: "episodic-claim", + source_name: "episodic", + repo_url: "https://github.com/CopilotKit/CopilotKit", + ref: "main", + title: "An episodic observation", + content: "why/how prose", + provenance: { + source: "episodic", + date: "2026-06-08", + classification: { + sensitivity: "internal", + knowledge_type: "architecture", + audience: "all-staff", + validation_status: "unverified", + confidence: "low", + provenance_class: "derived", + freshness: { as_of: "2026-06-08" }, + }, + }, + evidence: [], + needsReview: true, + validationTargets: [], + ...overrides, + }; + } + + // T-1.a — episodic + needsReview as a string → invalid_type → exit 3 (NOT 4). + it("T-1.a: exits 3 when episodic needsReview is a string (base-schema invalid_type)", () => { + const bad = baseEpisodicFragment({ needsReview: "true" }); + const result = runCli( + ["--run-id", runId, "--runs-dir", runsDir, "--stem", "t1a"], + JSON.stringify(bad), + ); + expect(result.status).toBe(3); + expect(result.stderr.toLowerCase()).toMatch(/schema/); + }); + + // T-1.b — episodic + confidence as a number → invalid_type → exit 3 (NOT 4). + it("T-1.b: exits 3 when episodic confidence is a number (base-schema invalid_type)", () => { + const bad = baseEpisodicFragment(); + ( + bad.provenance as { classification: Record } + ).classification.confidence = 5; + const result = runCli( + ["--run-id", runId, "--runs-dir", runsDir, "--stem", "t1b"], + JSON.stringify(bad), + ); + expect(result.status).toBe(3); + expect(result.stderr.toLowerCase()).toMatch(/schema/); + }); + + // T-1.c — episodic + confidence as a non-enum string → invalid_enum_value → exit 3. + it("T-1.c: exits 3 when episodic confidence is a non-enum string (base-schema invalid_enum_value)", () => { + const bad = baseEpisodicFragment(); + ( + bad.provenance as { classification: Record } + ).classification.confidence = "made-up"; + const result = runCli( + ["--run-id", runId, "--runs-dir", runsDir, "--stem", "t1c"], + JSON.stringify(bad), + ); + expect(result.status).toBe(3); + expect(result.stderr.toLowerCase()).toMatch(/schema/); + }); + + // T-1.d (positive) — episodic fragments that satisfy the base schema but + // fail exactly ONE of the four `.refine(...)` invariants must each route to + // exit 4 with stderr naming the offending field. Parametrized over all four + // invariants — needsReview, provenance_class, confidence, validation_status + // — so a future regression that shrinks EPISODIC_INVARIANT_FIELDS (e.g. back + // to needsReview-only) is caught. Each case mutates `baseEpisodicFragment()` + // (which satisfies all four refinements) along exactly ONE axis. + it.each([ + { + field: "needsReview", + stem: "t1d-needsreview", + mutate: (frag: Record) => { + frag.needsReview = false; + }, + }, + { + field: "provenance_class", + stem: "t1d-provclass", + mutate: (frag: Record) => { + ( + frag.provenance as { classification: Record } + ).classification.provenance_class = "primary"; + }, + }, + { + field: "confidence", + stem: "t1d-confidence", + mutate: (frag: Record) => { + ( + frag.provenance as { classification: Record } + ).classification.confidence = "high"; + }, + }, + { + field: "validation_status", + stem: "t1d-valstatus", + mutate: (frag: Record) => { + ( + frag.provenance as { classification: Record } + ).classification.validation_status = "source-verified"; + }, + }, + ])( + "T-1.d: exits 4 when episodic violates the $field invariant (refinement custom-issue)", + ({ field, stem, mutate }) => { + const bad = baseEpisodicFragment(); + mutate(bad); + const result = runCli( + ["--run-id", runId, "--runs-dir", runsDir, "--stem", stem], + JSON.stringify(bad), + ); + expect(result.status).toBe(4); + // stderr must name the offending field — the ZodError's issue path + // includes the field name verbatim. Asserts that the gate genuinely + // covers THIS invariant (not just that exit 4 fired for some reason). + expect(result.stderr).toContain(field); + // Sanity: the "episodic invariant violation" label is always present. + expect(result.stderr.toLowerCase()).toContain("episodic invariant"); + }, + ); + + // M-2 — AND-case routing: a fragment that fails BOTH a base-schema + // constraint (confidence as number → invalid_type) AND an episodic + // refinement (needsReview=false → custom) must route to exit 3. Per + // spec §4.2.1, a base-schema failure means the fragment isn't valid + // CandidateFragment shape at all — the refinement verdict is moot, so + // exit 3 (base-schema) wins over exit 4 (refinement) in the AND case. + it("M-2: exits 3 when episodic input fails BOTH base-schema (confidence=number) AND a refinement (needsReview=false)", () => { + const bad = baseEpisodicFragment({ needsReview: false }); + ( + bad.provenance as { classification: Record } + ).classification.confidence = 5; + const result = runCli( + ["--run-id", runId, "--runs-dir", runsDir, "--stem", "m2"], + JSON.stringify(bad), + ); + expect(result.status).toBe(3); + expect(result.stderr.toLowerCase()).toMatch(/expected|invalid|schema/); + }); + + // M-4 — `--stdin` is accepted as a no-op flag for spec-literal + // invocation compatibility. The literal invocation in §4.2.1 reads + // `atlas harvest write-fragment --run-id --fragment-id + // --stdin`, so the CLI must accept `--stdin` without erroring. stdin + // is always read regardless of the flag. + it("M-4: exits 0 when --stdin is passed as a no-op flag (spec-literal invocation)", () => { + const result = runCli( + [ + "--run-id", + runId, + "--runs-dir", + runsDir, + "--stem", + "m4-stdin", + "--stdin", + ], + JSON.stringify(baseFragment()), + ); + expect(result.status).toBe(0); + const expected = path.join(runsDir, runId, "fragments", "m4-stdin.json"); + expect(fs.existsSync(expected)).toBe(true); + }); + + // T-5 — mkdir failure vs write EEXIST must be disambiguated. If the + // `//fragments` PATH already exists as a regular file, + // mkdirSync({recursive:true}) raises EEXIST. That is NOT a stem collision; + // it is an operator-environment problem (exit 1). Only an EEXIST from the + // write step (file at the resolved stem path exists) is a stem collision. + it("T-5: exits 1 when the fragments dir path is occupied by a regular file (mkdir-class failure, NOT exit 2)", () => { + // Pre-create `//fragments` as a file so mkdirSync trips + // EEXIST against a non-dir. + const runDir = path.join(runsDir, runId); + fs.mkdirSync(runDir, { recursive: true }); + fs.writeFileSync(path.join(runDir, "fragments"), "occupied\n"); + + const result = runCli( + ["--run-id", runId, "--runs-dir", runsDir, "--stem", "t5"], + JSON.stringify(baseFragment()), + ); + expect(result.status).toBe(1); + // Error is named as a mkdir-class failure (not a write-class one) and + // identifies the fragments directory path. The exit-2 message format is + // `${stem}.json already exists at ${filePath}` — assert that the stem + // wording is absent so a regression that re-collapses the two try-blocks + // (mis-routing mkdir-EEXIST as a stem collision) is caught. + expect(result.stderr.toLowerCase()).toMatch(/mkdir/); + expect(result.stderr.toLowerCase()).toMatch(/fragments/); + expect(result.stderr).not.toMatch(/\.json already exists at /); + }); + + // T-R4-4 — `--stem` value is interpolated into a filesystem path. Without a + // filesystem-safe regex gate, `--stem ../../evil` yields a write OUTSIDE the + // fragments directory. Per spec §4.2.1 exit-code matrix, this is the + // operator/input class — exit 1 with stem-validation error wording, BEFORE + // the mkdir/write attempt. + it("T-R4-4: exits 1 when --stem contains path-traversal characters (../, /, leading dot, etc.)", () => { + // Spec §4.2.1: filesystem-safe stems only. Operator/LLM-generated stems with + // `../`, `/`, leading-dot, or other path-traversal sequences must be rejected + // BEFORE the mkdir/write attempt to prevent writes outside . + const traversalStem = "../../evil"; + const result = runCli( + ["--run-id", runId, "--runs-dir", runsDir, "--stem", traversalStem], + JSON.stringify(baseFragment()), + ); + expect(result.status).toBe(1); + expect(result.stderr.toLowerCase()).toMatch(/stem|invalid|traversal/); + // The error message should name the stem-validation failure, not a mkdir/write error. + expect(result.stderr).not.toMatch(/mkdir/); + expect(result.stderr).not.toMatch(/\.json already exists at /); + }); + + it("T-R4-4: accepts a filesystem-safe stem (alphanumeric + . _ -)", () => { + // Safe stem characters: A-Z a-z 0-9 . _ - (no path separators, no leading dot, + // no traversal sequences). + const result = runCli( + ["--run-id", runId, "--runs-dir", runsDir, "--stem", "valid-stem.123_ok"], + JSON.stringify(baseFragment()), + ); + expect(result.status).toBe(0); + }); + + // T-R5-2 — STEM_PATTERN negative-test coverage hardening. T-R4-4 covers ONE + // axis (leading-dot traversal). A regex weakening that admits `/` mid-string + // or other path-component shapes would not be caught by T-R4-4 alone. These + // three tests pin the additional STEM_PATTERN rejection axes so any future + // edit that broadens the character class produces a visible regression. + it("T-R5-2: exits 1 when --stem contains a mid-string path separator (foo/bar)", () => { + // Mid-string `/` is the most dangerous regex-weakening vector: a stem like + // `foo/bar` would write to `/foo/bar.json` and could be + // chained with `..` to escape. STEM_PATTERN's `[A-Za-z0-9._-]` body class + // does NOT include `/`, so this must reject. + const result = runCli( + ["--run-id", runId, "--runs-dir", runsDir, "--stem", "foo/bar"], + JSON.stringify(baseFragment()), + ); + expect(result.status).toBe(1); + expect(result.stderr.toLowerCase()).toMatch(/stem|invalid/); + expect(result.stderr).not.toMatch(/mkdir/); + }); + + it("T-R5-2: exits 1 when --stem has a leading path separator (/absolute/path)", () => { + // A leading `/` would resolve `path.join(fragmentsDir, "/absolute/path.json")` + // to an absolute escape. STEM_PATTERN's leading-character anchor requires + // `[A-Za-z0-9]`, so this must reject. + const result = runCli( + ["--run-id", runId, "--runs-dir", runsDir, "--stem", "/absolute/path"], + JSON.stringify(baseFragment()), + ); + expect(result.status).toBe(1); + expect(result.stderr.toLowerCase()).toMatch(/stem|invalid/); + expect(result.stderr).not.toMatch(/mkdir/); + }); + + it("T-R5-2: exits 1 when --stem has a leading double-dot (..foo)", () => { + // The leading-character anchor `[A-Za-z0-9]` excludes `.`, so a stem + // starting with `..` (the classic traversal prefix) is rejected by the + // leading-anchor — independently of any body-position `..` permissiveness. + const result = runCli( + ["--run-id", runId, "--runs-dir", runsDir, "--stem", "..foo"], + JSON.stringify(baseFragment()), + ); + expect(result.status).toBe(1); + expect(result.stderr.toLowerCase()).toMatch(/stem|invalid/); + expect(result.stderr).not.toMatch(/mkdir/); + }); +}); + +// Direct unit tests on the `isEpisodicInvariantIssue` predicate. These exist +// because the AND-case precedence rule (any non-custom issue downgrades the +// whole ZodError to exit 3) is not exercisable through the CLI integration +// path: Zod's base-parse short-circuits on `invalid_type` BEFORE refinements +// run, so a real fragment can never produce a mixed-code ZodError via the +// episodic-parse codepath. The predicate, however, is a defensive guard for +// the spec contract (§4.2.1) and any future code that COULD pass a mixed +// ZodError (e.g. a custom parse path that runs base + refinements together). +// Test (c) is the regression-armor for that contract. +describe("isEpisodicInvariantIssue: AND-case precedence direct unit tests", () => { + it("returns FALSE for a pure base-schema failure (invalid_type only)", () => { + const err = new z.ZodError([ + { + code: z.ZodIssueCode.invalid_type, + expected: "boolean", + received: "string", + path: ["needsReview"], + message: "Expected boolean, received string", + }, + ]); + expect(isEpisodicInvariantIssue(err)).toBe(false); + }); + + it("returns TRUE for a pure refinement failure (custom only, path matches EPISODIC_INVARIANT_FIELDS)", () => { + const err = new z.ZodError([ + { + code: z.ZodIssueCode.custom, + path: ["needsReview"], + message: "needsReview must be true when validation_status is pending", + }, + ]); + expect(isEpisodicInvariantIssue(err)).toBe(true); + }); + + it("returns FALSE for a mixed-code ZodError (custom + invalid_type) — base-schema wins per §4.2.1 precedence", () => { + const err = new z.ZodError([ + { + code: z.ZodIssueCode.custom, + path: ["needsReview"], + message: "needsReview must be true when validation_status is pending", + }, + { + code: z.ZodIssueCode.invalid_type, + expected: "number", + received: "string", + path: ["confidence"], + message: "Expected number, received string", + }, + ]); + expect(isEpisodicInvariantIssue(err)).toBe(false); + }); + + it("returns FALSE for a custom issue whose path-last is NOT in EPISODIC_INVARIANT_FIELDS", () => { + const err = new z.ZodError([ + { + code: z.ZodIssueCode.custom, + path: ["title"], + message: "title must not contain a subsystem delimiter", + }, + ]); + expect(isEpisodicInvariantIssue(err)).toBe(false); + }); + + // Positive per-invariant coverage. The existing (b) case above pins + // needsReview; the next three pin the remaining three EPISODIC_INVARIANT_FIELDS + // (provenance_class, confidence, validation_status). Path shapes mirror the + // refines on EpisodicCandidateFragmentSchema in src/atlas/types.ts — the three + // classification-nested refines emit FULL nested paths + // (["provenance","classification",]), while needsReview uses the + // single-element path. The predicate matches on path[path.length-1], so the + // leaf form would also suffice; using the actual refine path-shape keeps + // these tests faithful to what production ZodErrors look like. + it("returns TRUE for a pure custom failure on provenance_class invariant (nested path)", () => { + const err = new z.ZodError([ + { + code: z.ZodIssueCode.custom, + path: ["provenance", "classification", "provenance_class"], + message: "episodic requires provenance_class=derived", + }, + ]); + expect(isEpisodicInvariantIssue(err)).toBe(true); + }); + + it("returns TRUE for a pure custom failure on confidence invariant (nested path)", () => { + const err = new z.ZodError([ + { + code: z.ZodIssueCode.custom, + path: ["provenance", "classification", "confidence"], + message: "episodic requires confidence=low (clamped)", + }, + ]); + expect(isEpisodicInvariantIssue(err)).toBe(true); + }); + + it("returns TRUE for a pure custom failure on validation_status invariant (nested path)", () => { + const err = new z.ZodError([ + { + code: z.ZodIssueCode.custom, + path: ["provenance", "classification", "validation_status"], + message: "episodic requires validation_status=unverified", + }, + ]); + expect(isEpisodicInvariantIssue(err)).toBe(true); + }); + + // Defensive edge case: an empty-issues ZodError. The predicate's + // `issues.length === 0` early-return must hold — an empty ZodError carries + // no invariant signal and must NOT route to exit 4. Routes to exit 3 (base + // lane) by default per §4.2.1. + it("returns FALSE for an empty-issues ZodError (defensive edge case)", () => { + const err = new z.ZodError([]); + expect(isEpisodicInvariantIssue(err)).toBe(false); + }); +}); diff --git a/src/atlas/harvest-cli.ts b/src/atlas/harvest-cli.ts index aaa7bdf..7cc9d8f 100644 --- a/src/atlas/harvest-cli.ts +++ b/src/atlas/harvest-cli.ts @@ -38,6 +38,13 @@ import { fileURLToPath } from "node:url"; import { Command, CommanderError, Option } from "commander"; import { Client } from "@notionhq/client"; +// ── Schemas (write-fragment subcommand) ──────────────────────────────────────── +import { + CandidateFragmentSchema, + EpisodicCandidateFragmentSchema, +} from "./types.js"; +import { claimSlug } from "./canonicalize.js"; + // ── The seven leaf adapters — imported HERE and nowhere else (assembly point) ── import { memoryAdapter } from "./adapters/memory.js"; import { githubAdapter } from "./adapters/github.js"; @@ -619,6 +626,266 @@ async function reindexCommand( ); } +// ── write-fragment subcommand (spec §4.2) ────────────────────────────────────── +// +// Read a single CandidateFragment JSON object from stdin, validate it against +// the appropriate family schema (`CandidateFragmentSchema` for non-episodic, +// `EpisodicCandidateFragmentSchema` for episodic — the episodic schema layers +// the four episodic-invariant refinements on top of the base), and write the +// validated (and possibly sensitivity-coerced) fragment EXCLUSIVELY to +// `//fragments/.json`. +// +// `--stem` is OPTIONAL: when omitted, the stem is derived from the fragment's +// canonical-key components (`claimSlug(::claimSlug(claimSlugHint || title))`) +// so two fragments with the same claim text but different sourcetype/subsystem +// don't collide. The derived stem is itself idempotent across the canonicalize +// path (claimSlug normalizes case/punctuation). +// +// Exit-code matrix (spec §4.2.1): +// 0 — success (fragment written; absolute path printed to stdout) +// 1 — stdin/IO failure (bad JSON, unreadable stdin, write error other than EEXIST) +// 2 — stem collision (file already exists; exclusive-create fails with EEXIST) +// 3 — schema validation failure (base CandidateFragmentSchema rejected the input, +// OR an episodic input whose Zod error path is NOT one of the four episodic +// invariants — i.e. a base-schema failure surfaced through the episodic parse) +// 4 — episodic invariant violation (sourcetype === "episodic" AND the Zod error +// path identifies one of the four episodic invariants: needsReview, +// provenance_class, confidence, validation_status) +// +// The fail-loud rule: stderr always carries the underlying error message; the +// exit code distinguishes the FAILURE CLASS so the caller (leaf adapter, CI +// gate) can route accordingly. + +const EPISODIC_INVARIANT_FIELDS = new Set([ + "needsReview", + "provenance_class", + "confidence", + "validation_status", +]); + +interface WriteFragmentCliOptions { + runId?: string; + runsDir?: string; + stem?: string; +} + +// Read the entirety of an async iterable stream into a utf-8 string. Bounded +// only by available memory — fragments are small (a few KB each) so a full +// read is fine; streaming-parse would add complexity for zero benefit. +async function readAllStdin(): Promise { + const chunks: Buffer[] = []; + for await (const chunk of process.stdin) { + chunks.push(typeof chunk === "string" ? Buffer.from(chunk) : chunk); + } + return Buffer.concat(chunks).toString("utf-8"); +} + +// Inspect a ZodError's issues and decide whether the parse failure is purely +// an episodic-invariant refinement violation (exit 4) versus a base-schema +// failure that surfaced through the episodic parse (exit 3). The episodic +// schema's refinement paths are authored explicitly (see +// EpisodicCandidateFragmentSchema in types.ts); the four `.refine(...)` calls +// all emit Zod issues with `code: "custom"` (the default for refinements). +// +// Routing rules (per spec §4.2.1): +// - Per-issue gate: only `code: "custom"` issues whose path-last lands on +// one of EPISODIC_INVARIANT_FIELDS are candidates for exit 4. invalid_type +// / invalid_enum_value / invalid_literal / unrecognized_keys etc. are +// base-schema issues and route to exit 3 even when they land on a +// refinement-named field. +// - AND-case precedence: if ANY issue in the same ZodError is a non-custom +// base-schema issue, the fragment isn't even valid CandidateFragment +// shape, so the refinement verdict is moot — route to exit 3. Exit 3 +// ALWAYS wins over exit 4 when both apply. +// +// Exported for direct unit-testing of the AND-case precedence predicate; +// production callers reach it through the write-fragment command body below. +export function isEpisodicInvariantIssue( + error: unknown, +): error is { issues: Array<{ path: (string | number)[]; message: string }> } { + if (!error || typeof error !== "object") return false; + const issues = (error as { issues?: unknown }).issues; + if (!Array.isArray(issues) || issues.length === 0) return false; + // AND-case precedence: any non-custom issue downgrades the whole ZodError + // to exit 3. A base-schema failure (invalid_type / invalid_enum_value / + // invalid_literal / unrecognized_keys / etc.) means the fragment isn't a + // valid CandidateFragment at all — the episodic-refinement verdict is moot. + if (issues.some((issue) => (issue as { code?: unknown }).code !== "custom")) { + return false; + } + // All issues are `code: "custom"`. At least one must point at an episodic + // invariant for this to route to exit 4. A custom issue whose path-last is + // NOT in EPISODIC_INVARIANT_FIELDS (e.g. the subsystem-delimiter refine on + // the base CandidateFragmentSchema) is a base-schema-class refinement and + // still routes to exit 3. + return issues.some((issue) => { + const path = (issue as { path?: (string | number)[] }).path; + if (!Array.isArray(path) || path.length === 0) return false; + const last = path[path.length - 1]; + return typeof last === "string" && EPISODIC_INVARIANT_FIELDS.has(last); + }); +} + +// The write-fragment command body. Returns the exit code per §4.2.1; never +// throws — all failure classes are routed through the exit-code matrix. +export async function writeFragmentCommand( + options: WriteFragmentCliOptions, + writeOut: WriteFn, + writeErr: WriteFn, + stdinReader: () => Promise = readAllStdin, +): Promise { + if (!options.runId) { + writeErr("atlas-harvest write-fragment: --run-id is required\n"); + return 1; + } + if (!options.runsDir) { + writeErr("atlas-harvest write-fragment: --runs-dir is required\n"); + return 1; + } + + // 1. Read + JSON-parse stdin. Both stdin IO and JSON parse failures are + // exit 1 (stdin/IO class). + let raw: string; + try { + raw = await stdinReader(); + } catch (err) { + writeErr( + `atlas-harvest write-fragment: stdin read failed: ${formatCliError(err)}\n`, + ); + return 1; + } + let parsed: unknown; + try { + parsed = JSON.parse(raw); + } catch (err) { + writeErr( + `atlas-harvest write-fragment: stdin JSON parse failed: ${formatCliError(err)}\n`, + ); + return 1; + } + + // 2. Pick schema family by the fragment's `sourcetype` field. Inspect + // BEFORE parsing — we need the family to decide which schema to run and + // which exit-code class (3 vs 4) a failure maps to. + const sourcetype = + parsed && typeof parsed === "object" + ? (parsed as { sourcetype?: unknown }).sourcetype + : undefined; + const isEpisodic = sourcetype === "episodic"; + const schema = isEpisodic + ? EpisodicCandidateFragmentSchema + : CandidateFragmentSchema; + + // 3. Parse against the chosen schema. On failure: + // - non-episodic OR an episodic base-schema failure → exit 3 + // - episodic invariant refinement failure → exit 4 + const result = schema.safeParse(parsed); + if (!result.success) { + const exitCode = + isEpisodic && isEpisodicInvariantIssue(result.error) ? 4 : 3; + const label = + exitCode === 4 + ? "episodic invariant violation" + : "schema validation failure"; + writeErr( + `atlas-harvest write-fragment: ${label}: ${formatCliError(result.error)}\n`, + ); + return exitCode; + } + const fragment = result.data as { sourcetype: string; subsystem: string }; + + // 4. Resolve the stem — explicit `--stem` wins; otherwise derive from the + // fragment's canonical-key components (claimSlug normalizes the joined + // `claimSlug(::claimSlug(claimSlugHint || title))` + // to a filesystem-safe slug). + let stem: string; + if (options.stem !== undefined && options.stem !== "") { + stem = options.stem; + } else { + const fragWithClaim = result.data as { + sourcetype: string; + subsystem: string; + claimSlugHint?: string; + title: string; + }; + const claim = claimSlug(fragWithClaim.claimSlugHint || fragWithClaim.title); + stem = claimSlug( + `${fragWithClaim.sourcetype}:${fragWithClaim.subsystem}:${claim}`, + ); + } + + // 4a. Filesystem-safe stem gate (spec §4.2.1, T-R4-4, T-R5-2). `--stem` + // flows into `path.join(fragmentsDir, ...)` and an unvalidated value + // like `../../evil` writes OUTSIDE the fragments directory. The + // `STEM_PATTERN` regex below enforces: + // - First character must be alphanumeric `[A-Za-z0-9]`. This blocks + // leading-dot hidden-file values (`.hidden`), leading-dash + // flag-confusable values (`-flag`), AND any leading-`..` traversal + // prefix (because `.` is not in the leading char class). + // - Subsequent characters limited to `[A-Za-z0-9._-]`. Any path + // separator (`/`, `\`) is rejected because it's outside the body + // class — so a stem cannot construct a multi-component path at all. + // Note: a substring `..` is permitted in the body (e.g. `foo..bar`), + // but is operationally safe — with no `/` separator available, it + // cannot construct a traversal sequence to escape `fragmentsDir`. + // This is the operator/input class — exit 1, BEFORE the mkdir/write + // attempt. + const STEM_PATTERN = /^[A-Za-z0-9][A-Za-z0-9._-]*$/; + if (!STEM_PATTERN.test(stem)) { + writeErr( + `atlas-harvest write-fragment: invalid stem "${stem}" — must match ${STEM_PATTERN}\n`, + ); + return 1; + } + + // 5. Write EXCLUSIVELY under `//fragments/.json`. + // The mkdir step and the write step are intentionally NOT collapsed into + // one try/catch — they have DIFFERENT exit-code classes: + // + // - mkdir failure (EEXIST against a non-dir path, EACCES, ENOSPC, ...) + // is an operator-environment problem and routes to exit 1. + // - writeFileSync EEXIST (file at the resolved stem path already + // exists) is the spec-intended "stem collision" case and routes to + // exit 2. + // - Any other writeFileSync failure (EACCES, ENOSPC, ...) is also + // exit 1. + // + // Collapsing them would mis-route mkdir-EEXIST to exit 2 and mis-label + // mkdir-class IO errors as "write failed" (wrong syscall name). + const fragmentsDir = path.join(options.runsDir, options.runId, "fragments"); + const filePath = path.join(fragmentsDir, `${stem}.json`); + try { + fs.mkdirSync(fragmentsDir, { recursive: true }); + } catch (err) { + writeErr( + `atlas-harvest write-fragment: mkdir failed for fragments dir ${fragmentsDir}: ${formatCliError(err)}\n`, + ); + return 1; + } + try { + fs.writeFileSync(filePath, `${JSON.stringify(result.data, null, 2)}\n`, { + encoding: "utf-8", + flag: "wx", + }); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === "EEXIST") { + writeErr( + `atlas-harvest write-fragment: ${stem}.json already exists at ${filePath}\n`, + ); + return 2; + } + writeErr( + `atlas-harvest write-fragment: write failed for ${filePath}: ${formatCliError(err)}\n`, + ); + return 1; + } + + writeOut(`${path.resolve(filePath)}\n`); + // Fragment received subsystem field — silence unused-var TS lint. + void fragment; + return 0; +} + // Format a CLI error for stderr, walking the `{cause}` chain (bounded depth). // Several pipeline failures deliberately attach the underlying error as // `cause` — e.g. rag-dedup's consecutive-probe fail-fast wraps the ACTUAL @@ -756,9 +1023,50 @@ export async function runAtlasHarvestCli( await reindexCommand(options, writeOut); }); + // The write-fragment subcommand has its OWN exit-code matrix (§4.2.1: 0/1/2/3/4) + // that the standard commander error path cannot express. The action closes over + // this slot and the outer return picks it up. + let writeFragmentExitCode: number | undefined; + + program + .command("write-fragment") + .description( + "Read a CandidateFragment from stdin and write it under " + + "//fragments/.json. When --stem is omitted, " + + "the stem is derived as " + + "claimSlug(::claimSlug(claimSlugHint || title)). " + + "Exit codes per spec §4.2.1: 0 ok, 1 stdin/IO, 2 stem collision, " + + "3 schema, 4 episodic invariant.", + ) + .requiredOption( + "--run-id ", + "Run id under which the fragment is written", + ) + .requiredOption( + "--runs-dir ", + "Root directory of run corpora (e.g. ./runs)", + ) + .option( + "--stem ", + "Filesystem-safe fragment stem; if omitted, derived as " + + "claimSlug(::claimSlug(claimSlugHint || title))", + ) + .option( + "--stdin", + "Read fragment from stdin (no-op; stdin is always read — accepted for " + + "spec-literal invocation compatibility, see §4.2.1)", + ) + .action(async (options: WriteFragmentCliOptions) => { + writeFragmentExitCode = await writeFragmentCommand( + options, + writeOut, + writeErr, + ); + }); + try { await program.parseAsync(argv, { from: "user" }); - return 0; + return writeFragmentExitCode ?? 0; } catch (error) { if (error instanceof CommanderError) { return error.exitCode; From 9c1dd5816b018c73c2c4563db639fbac9ffe98e5 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 12 Jun 2026 11:36:52 -0700 Subject: [PATCH 5/6] Add defect-regression corpus, Zod refinement coverage doc, and runDualRun shadow-gate scaffold --- docs/atlas/refinement-coverage.md | 64 +++ src/__tests__/atlas-defect-regression.test.ts | 387 ++++++++++++++++ src/__tests__/atlas-dual-run.test.ts | 424 ++++++++++++++++++ .../atlas-refinement-coverage.test.ts | 138 ++++++ src/atlas/dual-run.ts | 315 +++++++++++++ 5 files changed, 1328 insertions(+) create mode 100644 docs/atlas/refinement-coverage.md create mode 100644 src/__tests__/atlas-defect-regression.test.ts create mode 100644 src/__tests__/atlas-dual-run.test.ts create mode 100644 src/__tests__/atlas-refinement-coverage.test.ts create mode 100644 src/atlas/dual-run.ts diff --git a/docs/atlas/refinement-coverage.md b/docs/atlas/refinement-coverage.md new file mode 100644 index 0000000..36d5065 --- /dev/null +++ b/docs/atlas/refinement-coverage.md @@ -0,0 +1,64 @@ +--- +title: Atlas Zod refinement coverage +status: living +source: src/atlas/types.ts +generated: 2026-06-12 +--- + +# Atlas Zod refinement coverage + +This document enumerates every Zod refinement and transform currently in +`src/atlas/types.ts` (the foundational Atlas contract). For each, it +records whether the constraint is **JSON-Schema-expressible** (and therefore +survives `zod-to-json-schema` conversion at orchestrator-shell boot) or +whether it **requires a post-pass** Zod parse after JSON Schema validation +(because it is a runtime predicate / transform that `zod-to-json-schema` +silently drops). + +This file is paired with test `src/__tests__/atlas-refinement-coverage.test.ts` +(T9 per spec §7.9). The test asserts the refinement count in this doc matches +the refinement count counted in source — so if you add a new `.refine(...)` / +`.superRefine(...)` / `.transform(...)` to `src/atlas/types.ts`, you MUST +add a corresponding row here, otherwise T9 fails with a stale-doc message. + +## Refinement table + +| Refinement | Schema | JSON-Schema-expressible? | Post-pass note | +| ----------------------------------------------------------------------- | --------------------------------------------- | -------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `subsystemHasNoDelimiter` (fragment) | `CandidateFragmentSchema` (line ~145) | No (runtime predicate over a string body) | Rejects when `subsystem` contains `:`, `⟦`, or `⟧`. JSON Schema cannot express a predicate over unicode delimiters as a portable `pattern`. Enforced by the CLI helper's post-pass `CandidateFragmentSchema.parse(input)` step (see spec §4.2.1, STEP 2). | +| `subsystemHasNoDelimiter` (finalized candidate) | `CandidateSchema` (line ~207) | No (runtime predicate) | Same predicate as the fragment row above, applied to the finalized Tier-3 `Candidate` after canonicalization. JSON Schema is not the validation surface for finalized rows — they are validated in TS by `CandidateSchema.parse(...)` — so this lives purely in Zod. | +| `episodic.needsReview === true` | `EpisodicCandidateFragmentSchema` (line ~166) | No (semantic invariant, not structural) | Rejects when `needsReview !== true`. Episodic leaves are "guilty until validated" — the per-family invariant cannot be expressed as a JSON Schema `const` on a `boolean` because the base `CandidateFragmentSchema` permits both values; only the episodic narrowing forbids `false`. Enforced as a SECOND parse via `EpisodicCandidateFragmentSchema` when `sourcetype === "episodic"` (spec §4.6). | +| `episodic.provenance.classification.provenance_class === "derived"` | `EpisodicCandidateFragmentSchema` (line ~173) | No (semantic invariant) | Rejects when `provenance_class !== "derived"`. Episodic leaves can never be `"primary"`. Enforced post-pass via the episodic-narrowed schema. | +| `episodic.provenance.classification.confidence === "low"` | `EpisodicCandidateFragmentSchema` (line ~177) | No (semantic invariant) | Rejects when `confidence !== "low"`. Episodic confidence is clamped to `"low"` by policy. Enforced post-pass via the episodic-narrowed schema. | +| `episodic.provenance.classification.validation_status === "unverified"` | `EpisodicCandidateFragmentSchema` (line ~181) | No (semantic invariant) | Rejects when `validation_status !== "unverified"`. Episodic claims are unverified by construction. Enforced post-pass via the episodic-narrowed schema. | +| `episodic sensitivity floor` (transform) | `EpisodicCandidateFragmentSchema` (line ~190) | No (`.transform` mutates the parsed value; not expressible in JSON Schema) | Coerces `sensitivity === "public"` upward to `"internal"`; `"internal"` / `"proprietary"` / `"secret"` are preserved verbatim. This is a "coerce up to floor" rewrite, NOT a "reject below floor" predicate, so even the JSON-Schema `enum` shape would not catch it (the input is allowed; the value just gets rewritten before persistence). Enforced post-pass via `EpisodicCandidateFragmentSchema.parse(...)`. | + +## Summary + +- Total refinements / transforms in `src/atlas/types.ts`: **7** +- JSON-Schema-expressible: **0** +- Post-pass required: **7** + +All seven entries are runtime predicates or transforms; none survive +`zod-to-json-schema` conversion. The CLI helper at `atlas harvest +write-fragment --stdin` therefore re-parses every fragment through Zod +(`CandidateFragmentSchema.parse` for base fragments, and additionally +`EpisodicCandidateFragmentSchema.parse` when `sourcetype === "episodic"`) +to enforce all seven. See spec §4.1.1, §4.2.1, and §4.6 for the full +orchestration-shell vs CLI-helper split. + +## Future-edit note + +If you add a `.refine(...)`, `.superRefine(...)`, `.transform(...)`, or +`.regex(...)` to `src/atlas/types.ts`, you must: + +1. Add a row to the table above describing the constraint, the host + schema, whether it is JSON-Schema-expressible, and where it is + enforced. +2. Update the **Summary** counts. +3. Re-run `npx vitest run src/__tests__/atlas-refinement-coverage.test.ts` + and confirm green. + +T9 fails fast on count drift so the silent-drop class of bug (a new +runtime predicate added to `types.ts` but never wired into the CLI +post-pass) is caught at test time, not at first failing leaf. diff --git a/src/__tests__/atlas-defect-regression.test.ts b/src/__tests__/atlas-defect-regression.test.ts new file mode 100644 index 0000000..b476468 --- /dev/null +++ b/src/__tests__/atlas-defect-regression.test.ts @@ -0,0 +1,387 @@ +import { describe, test, expect } from "vitest"; +import { CandidateFragmentSchema } from "../atlas/types.js"; + +// ── T4 + T5 + T7 — Defect-regression corpus (spec §1.1 / §7.4 / §7.7) ───────── +// +// This file codifies, as parametric regression tests, every defect class +// observed in one full-monty run that the new schema-enforcement boundary +// (CandidateFragmentSchema as the single I/O contract) must reject. It +// REPLACES today's repair-shim's permissive acceptance: any defect that the +// shim used to silently coerce is now a loud Zod rejection. +// +// The corpus comes in three blocks: +// +// T4 — 14 alias names for `provenance.classification.knowledge_type` +// (`kind` / `category` / `discipline` / `topic` / `domain` / `area` / +// `type` / `facet` / `bucket` / `class` / `subject` / `theme` / +// `label` / `tag`). Each fixture is otherwise-valid but substitutes +// the alias key for `knowledge_type`. The Zod schema rejects because +// `knowledge_type` is required. +// +// T5 — 12 other defect rows from §1.1 (rows 2–8 and 10–14; row 9 is +// DROPPED per plan SLOT-5 N1, because `audience` has a default and a +// fragment that ONLY omits it parses successfully under the current +// schema — the original observation reflects an older/lossy intake +// path, not a current-schema rejection). +// +// T7 — Integration: a happy-path fragment parses, and then for EACH of +// the 26 defect fixtures above, swapping the defect into the happy +// path yields a rejection. This ties T4 + T5 into a single +// comprehensive regression assertion: the same base, the same +// per-defect mutator, the same rejection. +// +// Row 14 footnote: §1.1 row 14 is "extra/unknown top-level fields silently +// dropped." Under the BASE (non-`.strict()`) CandidateFragmentSchema, extras +// are stripped during parse — the parse SUCCEEDS but the extras do not +// survive into the parsed object. That is a known asymmetry vs the spec's +// "rejects all rows 2–14" framing, and per spec NG1 we are not permitted to +// tighten the schema in this slot. The row-14 fixture therefore asserts the +// CURRENT contract (extras stripped, no extra-field leakage into the parsed +// candidate), so the test still guards against drift (a future change that +// causes extras to leak through would fail the assertion). + +// ── Helpers ────────────────────────────────────────────────────────────────── + +// Deep-clone a plain JSON-shaped object. The fixtures here are all +// JSON-safe (no Dates, no functions, no Maps), so structured cloning via +// JSON round-trip is sufficient and keeps each parametric case independent. +const clone = (o: T): T => JSON.parse(JSON.stringify(o)) as T; + +// Base happy-path fragment. T7's first assertion is that THIS parses; every +// defect fixture below is produced by applying ONE mutator to a clone of +// this base, so any rejection unambiguously traces to that single mutation. +const baseHappyPath = () => ({ + sourcetype: "memory" as const, + subsystem: "atlas-harvest", + source_name: "spec-§1.1", + title: "Schema-enforcement boundary catches every observed defect class", + content: + "Each defect row in §1.1 is a separate parse failure under CandidateFragmentSchema; the repair shim is no longer required.", + provenance: { + source: "atlas-leaf", + classification: { + sensitivity: "internal" as const, + knowledge_type: "process" as const, + audience: "all-staff", + validation_status: "unverified" as const, + confidence: "high" as const, + provenance_class: "primary" as const, + freshness: { as_of: "2026-06-12" }, + }, + }, + evidence: [], + needsReview: false, + validationTargets: [], +}); + +// Serialize all zod issues into one string so per-case regex assertions can +// match anywhere in the issue list (path OR message). Joining keeps the +// per-case `expect(...).toMatch(...)` line readable: the assertion fails +// with the FULL issue list pretty-printed, which makes drift diagnoses +// obvious. +const formatIssues = ( + issues: ReadonlyArray<{ path: ReadonlyArray; message: string }>, +): string => + issues + .map((i) => `${i.path.join(".") || "(root)"}: ${i.message}`) + .join(" | "); + +// ── T4 — 14 classification-key aliases ─────────────────────────────────────── +// +// Each alias replaces the literal `knowledge_type` key in +// `provenance.classification` while keeping the same valid enum VALUE +// (`"process"`). The Zod object schema requires `knowledge_type`, so the +// parse fails with an issue rooted at +// `provenance.classification.knowledge_type` (required field missing). The +// presence of the alias key itself is silently ignored (z.object strips +// unknowns) — the rejection is driven by the required key being absent, +// which is the right signal: it names the canonical key the model needs +// to emit. + +const KNOWLEDGE_TYPE_ALIASES = [ + "kind", + "category", + "discipline", + "topic", + "domain", + "area", + "type", + "facet", + "bucket", + "class", + "subject", + "theme", + "label", + "tag", +] as const; + +// Build a fragment that uses `alias` instead of `knowledge_type` in +// `provenance.classification`. Returns a plain object (not typed against +// CandidateFragment, because by construction it does NOT satisfy the +// inferred type). +const fragmentWithAlias = (alias: string): unknown => { + const f = clone(baseHappyPath()) as Record & { + provenance: { classification: Record }; + }; + const c = f.provenance.classification; + // Move the value to the alias key and drop the canonical key. + c[alias] = c.knowledge_type; + delete c.knowledge_type; + return f; +}; + +describe("T4: classification.knowledge_type key aliases (§1.1 row 1)", () => { + test.each(KNOWLEDGE_TYPE_ALIASES.map((alias) => [alias] as const))( + "rejects fragment whose classification uses alias %s instead of knowledge_type", + (alias) => { + const fixture = fragmentWithAlias(alias); + const result = CandidateFragmentSchema.safeParse(fixture); + expect(result.success).toBe(false); + if (result.success) return; // type guard + const formatted = formatIssues(result.error.issues); + // The rejection must be rooted at the canonical key the model SHOULD + // have emitted, so the operator can read the error and fix the alias. + expect(formatted).toMatch(/provenance\.classification\.knowledge_type/); + }, + ); +}); + +// ── T5 — 12 other defect rows (rows 2–8, 10–14; row 9 dropped) ─────────────── +// +// Each row is a mutator that turns the happy-path base into a single-defect +// fixture, plus a regex the formatted-issues string must match so the +// rejection's path/message names the offending field. + +interface DefectCase { + row: number; + desc: string; + // Mutate a CLONE of the happy-path base in place; the caller passes a + // fresh clone for each invocation. + mutate: (f: ReturnType) => unknown; + // The formatted-issues string MUST match this regex. Keep the regex tight + // enough to name the right field/path, loose enough to survive minor + // zod-message wording drift across patch releases. + expect: RegExp; +} + +const DEFECT_CASES: DefectCase[] = [ + { + row: 2, + desc: "classification lifted to top-level (top-level `sensitivity`)", + mutate: (f) => { + const lifted: Record = { ...(f as object) }; + lifted.sensitivity = f.provenance.classification.sensitivity; + // Drop the nested copy so the inner Sensitivity enum field is missing. + delete (lifted.provenance as { classification: Record }) + .classification.sensitivity; + return lifted; + }, + expect: /provenance\.classification\.sensitivity/, + }, + { + row: 3, + desc: "evidence as string (path) instead of array", + mutate: (f) => { + (f as unknown as { evidence: unknown }).evidence = "src/foo.ts"; + return f; + }, + expect: /evidence/, + }, + { + row: 4, + desc: "evidence as plain object instead of array", + mutate: (f) => { + (f as unknown as { evidence: unknown }).evidence = { + kind: "changed_file", + path: "src/foo.ts", + }; + return f; + }, + expect: /evidence/, + }, + { + row: 5, + desc: "evidence items missing `kind` discriminator", + mutate: (f) => { + (f as unknown as { evidence: unknown }).evidence = [ + { path: "src/foo.ts" }, + ]; + return f; + }, + expect: /evidence\.0(\..*)?/, + }, + { + row: 6, + desc: "provenance flattened — top-level `source`/`url` instead of nested", + mutate: (f) => { + const flat: Record = { ...(f as object) }; + flat.source = f.provenance.source; + flat.url = "https://example.invalid/issue/42"; + delete (flat as { provenance?: unknown }).provenance; + return flat; + }, + expect: /provenance/, + }, + { + row: 7, + desc: "provenance.classification lifted to top-level", + mutate: (f) => { + const lifted: Record = { ...(f as object) }; + lifted.classification = f.provenance.classification; + delete (lifted.provenance as { classification?: unknown }).classification; + return lifted; + }, + expect: /provenance\.classification/, + }, + { + row: 8, + desc: "freshness as string instead of `{ as_of }` object", + mutate: (f) => { + ( + f.provenance.classification as unknown as { freshness: unknown } + ).freshness = "2026-06-09"; + return f; + }, + expect: /provenance\.classification\.freshness/, + }, + // Row 9 dropped — see file-header comment. + { + row: 10, + desc: "validationTargets as string instead of array", + mutate: (f) => { + (f as unknown as { validationTargets: unknown }).validationTargets = + "src/foo.ts"; + return f; + }, + expect: /validationTargets/, + }, + { + row: 11, + desc: "needsReview as string instead of boolean", + mutate: (f) => { + (f as unknown as { needsReview: unknown }).needsReview = "true"; + return f; + }, + expect: /needsReview/, + }, + { + row: 12, + desc: "subsystem containing canonical-key delimiter `:`", + mutate: (f) => { + f.subsystem = "foo:bar"; + return f; + }, + expect: /subsystem/, + }, + { + row: 13, + desc: "missing top-level `sourcetype`", + mutate: (f) => { + delete (f as unknown as { sourcetype?: unknown }).sourcetype; + return f; + }, + expect: /sourcetype/, + }, + { + row: 14, + desc: "extra/unknown top-level fields (e.g. `summary`, `tags`) — stripped, not rejected (base schema is non-strict; see file header)", + // Row 14 is the one defect class where the BASE schema does not REJECT — + // z.object() strips unknown keys. We instead assert that the extras do + // not LEAK into the parsed candidate (the contract the rest of the + // pipeline depends on). NG1 forbids tightening the schema to `.strict()` + // in this slot. + mutate: (f) => { + const withExtras = { ...(f as object), summary: "drop me", tags: ["x"] }; + return withExtras; + }, + expect: /__row14_marker_unused__/, // never matched; row 14 takes the alternate assertion path below + }, +]; + +describe("T5: other defect rows (§1.1 rows 2–8, 10–14)", () => { + test.each(DEFECT_CASES.map((c) => [c.row, c.desc, c] as const))( + "row %d — %s", + (row, _desc, c) => { + const fixture = c.mutate(clone(baseHappyPath())); + const result = CandidateFragmentSchema.safeParse(fixture); + + if (row === 14) { + // Row 14 — assert the documented current behavior: parse SUCCEEDS, + // extras stripped, canonical fields all present. + expect(result.success).toBe(true); + if (!result.success) return; + const parsedKeys = Object.keys(result.data); + expect(parsedKeys).not.toContain("summary"); + expect(parsedKeys).not.toContain("tags"); + // The canonical fields survived the parse. + expect(parsedKeys).toEqual( + expect.arrayContaining([ + "sourcetype", + "subsystem", + "source_name", + "title", + "content", + "provenance", + "evidence", + "needsReview", + "validationTargets", + ]), + ); + return; + } + + expect(result.success).toBe(false); + if (result.success) return; // type guard + const formatted = formatIssues(result.error.issues); + expect(formatted).toMatch(c.expect); + }, + ); +}); + +// ── T7 — Integration: happy-path passes; every defect swap rejects ─────────── +// +// T7 ties T4 + T5 together: ONE base, the SAME per-defect mutators, parse +// runs end-to-end. The intent is to prove the happy path is wired correctly +// AND that no defect leaks through under the same surface the production +// helper uses. If a future schema tweak accidentally re-admits a defect, +// this block fails at the integration layer in addition to the focused +// T4/T5 case. + +describe("T7: integration — happy path + every defect swap", () => { + test("happy-path fragment parses successfully", () => { + const result = CandidateFragmentSchema.safeParse(baseHappyPath()); + expect(result.success).toBe(true); + if (!result.success) return; + // Sanity: the parsed candidate's enum-typed fields survived. + expect(result.data.sourcetype).toBe("memory"); + expect(result.data.provenance.classification.knowledge_type).toBe( + "process", + ); + }); + + test.each(KNOWLEDGE_TYPE_ALIASES.map((a) => [a] as const))( + "alias swap (%s) rejected at integration layer", + (alias) => { + const result = CandidateFragmentSchema.safeParse( + fragmentWithAlias(alias), + ); + expect(result.success).toBe(false); + }, + ); + + test.each(DEFECT_CASES.map((c) => [c.row, c.desc, c] as const))( + "defect-row swap (row %d — %s) handled at integration layer", + (row, _desc, c) => { + const fixture = c.mutate(clone(baseHappyPath())); + const result = CandidateFragmentSchema.safeParse(fixture); + // Mirror T5: row 14 PASSES with extras stripped; all others REJECT. + if (row === 14) { + expect(result.success).toBe(true); + if (!result.success) return; + expect(Object.keys(result.data)).not.toContain("summary"); + return; + } + expect(result.success).toBe(false); + }, + ); +}); diff --git a/src/__tests__/atlas-dual-run.test.ts b/src/__tests__/atlas-dual-run.test.ts new file mode 100644 index 0000000..f191517 --- /dev/null +++ b/src/__tests__/atlas-dual-run.test.ts @@ -0,0 +1,424 @@ +import { describe, it, expect } from "vitest"; +import { runDualRun } from "../atlas/dual-run.js"; + +// T10 — dual-run shadow gate scaffold (spec §6.2, §7.6). +// +// Exercises the three precondition branches of `runDualRun`: +// (a) seed-present + match ........... byte-equality after canonicalize +// (a) seed-present + diverge ......... canonicalize differs → diagnose field +// (b) no-seed + relaxed-match ........ same shape + enums + text ≥ 0.95 +// (b) no-seed + diverge .............. enum field differs +// (c) neither-available + gated ...... refuse to advance + +describe("runDualRun — §7.6 T10 shadow-gate scaffold", () => { + it("seed-present: identical runs canonicalize equal → match", () => { + // Key-permuted but value-identical fragments. canonicalizeFragment sorts + // keys + normalizes whitespace, so the stringified canonical output is + // byte-equal — strict-comparator match. + const runA = { + title: "Atlas schema enforcement", + content: "First line.\nSecond line.", + sensitivity: "internal", + confidence: "high", + }; + const runB = { + // Same fields, different insertion order, equivalent whitespace. + content: "First line. Second line.", + sensitivity: "internal", + title: "Atlas schema enforcement", + confidence: "high", + }; + + const verdict = runDualRun({ + runA, + runB, + seedAvailable: true, + relaxedComparatorAvailable: false, + }); + + expect(verdict.result).toBe("match"); + expect(verdict.reason).toMatch(/seed-present/); + }); + + it("seed-present: runs differing in title → diverge, reason names title", () => { + const runA = { + title: "Atlas schema enforcement", + content: "Same content body.", + sensitivity: "internal", + confidence: "high", + }; + const runB = { + title: "Atlas schema enforcement — revised", + content: "Same content body.", + sensitivity: "internal", + confidence: "high", + }; + + const verdict = runDualRun({ + runA, + runB, + seedAvailable: true, + relaxedComparatorAvailable: false, + }); + + expect(verdict.result).toBe("diverge"); + expect(verdict.reason).toContain("title"); + }); + + // M-1 + M-5 regression coverage — the no-seed-relaxed branch must read the + // five classification enums at their REAL nested path + // (provenance.classification.) AND must also enforce sourcetype + // (top-level) and per-item evidence[].kind. The "both-missing" case for any + // covered enum is a structural divergence (per spec §7.6 the relaxed + // comparator's purpose is to catch enum drift; the classification record + // itself is a structural invariant). + + // Minimum-valid CandidateFragment shape — matches CandidateFragmentObject + // in src/atlas/types.ts (validated by CandidateFragmentSchema). Helper keeps + // the relaxed-branch tests below readable; tests that need to perturb a + // single field call baseFragment() on both sides and mutate one side. + const baseFragment = () => ({ + sourcetype: "github-pr" as const, + subsystem: "atlas", + source_name: "test-source", + title: "Atlas pipeline overview", + content: "Pipeline that canonicalizes fragments under provenance.", + provenance: { + source: "test", + classification: { + sensitivity: "internal" as const, + knowledge_type: "architecture" as const, + audience: "all-staff", + validation_status: "showcase-verified" as const, + confidence: "high" as const, + provenance_class: "primary" as const, + freshness: { as_of: "2026-06-12" }, + }, + }, + evidence: [ + { kind: "changed_file" as const, path: "src/atlas/dual-run.ts" }, + ], + needsReview: false, + validationTargets: [], + }); + + it("no-seed: same enums + highly-similar text → relaxed-match", () => { + // Same shape (identical key set), nested enums byte-equal, text fields + // differ by ONE token out of 20+ — Jaccard well above 0.95. + const sharedTokens = + "the atlas pipeline canonicalizes fragments and ranks them by confidence and recency under provenance class primary"; + const runA = baseFragment(); + runA.content = sharedTokens + " plus an extra clarifying token here"; + const runB = baseFragment(); + runB.content = sharedTokens + " plus an extra clarifying token here"; + + const verdict = runDualRun({ + runA, + runB, + seedAvailable: false, + relaxedComparatorAvailable: true, + }); + + expect(verdict.result).toBe("relaxed-match"); + }); + + it("no-seed: enum field differs (sensitivity) → diverge", () => { + const runA = baseFragment(); + const runB = baseFragment(); + runB.provenance.classification.sensitivity = + "public" as typeof runB.provenance.classification.sensitivity; + + const verdict = runDualRun({ + runA, + runB, + seedAvailable: false, + relaxedComparatorAvailable: true, + }); + + expect(verdict.result).toBe("diverge"); + expect(verdict.reason).toContain("sensitivity"); + }); + + it("M-1 nesting: mismatched provenance.classification.knowledge_type → diverge", () => { + // Independent corroboration of the nested-path read for a SECOND enum + // (not sensitivity, exercised above). Catches a future regression that + // hardcodes the path for sensitivity but mis-handles the other four. + const runA = baseFragment(); + const runB = baseFragment(); + runB.provenance.classification.knowledge_type = + "ownership" as typeof runB.provenance.classification.knowledge_type; + + const verdict = runDualRun({ + runA, + runB, + seedAvailable: false, + relaxedComparatorAvailable: true, + }); + + expect(verdict.result).toBe("diverge"); + expect(verdict.reason).toContain("knowledge_type"); + }); + + it("M-1 sourcetype: top-level sourcetype mismatch → diverge", () => { + const runA = baseFragment(); + const runB = baseFragment(); + runB.sourcetype = "notion-doc" as typeof runB.sourcetype; + + const verdict = runDualRun({ + runA, + runB, + seedAvailable: false, + relaxedComparatorAvailable: true, + }); + + expect(verdict.result).toBe("diverge"); + expect(verdict.reason).toContain("sourcetype"); + }); + + it("T-R5-1 both-missing sourcetype: neither side has sourcetype → diverge", () => { + // CandidateFragmentObject declares `sourcetype` as a REQUIRED enum (no + // .optional(), no .default()). Per the M-5/T-R3-1/T-R3-2 precedent, the + // relaxed comparator must NOT silently pass when both sides are missing a + // required structural field. JSON.stringify(undefined) === undefined on + // both sides would compare-equal and silent-pass without an explicit + // both-missing guard. + const runA = baseFragment(); + const runB = baseFragment(); + delete (runA as any).sourcetype; + delete (runB as any).sourcetype; + + const verdict = runDualRun({ + runA, + runB, + seedAvailable: false, + relaxedComparatorAvailable: true, + }); + + expect(verdict.result).toBe("diverge"); + expect(verdict.reason).toContain("sourcetype"); + }); + + it("M-1 evidence[].kind: per-item evidence kind mismatch → diverge", () => { + const runA = baseFragment(); + const runB = baseFragment(); + // Swap the single evidence item's discriminant on runB. + runB.evidence = [{ kind: "linked_issue" as const, path: "x" } as any]; + + const verdict = runDualRun({ + runA, + runB, + seedAvailable: false, + relaxedComparatorAvailable: true, + }); + + expect(verdict.result).toBe("diverge"); + expect(verdict.reason).toMatch(/evidence/); + }); + + it("M-5 both-missing: neither side has provenance.classification.sensitivity → diverge", () => { + const runA = baseFragment(); + const runB = baseFragment(); + // Drop sensitivity on both sides. The classification record is a + // structural invariant per spec §7.6; both-missing must NOT silently pass. + delete (runA.provenance.classification as any).sensitivity; + delete (runB.provenance.classification as any).sensitivity; + + const verdict = runDualRun({ + runA, + runB, + seedAvailable: false, + relaxedComparatorAvailable: true, + }); + + expect(verdict.result).toBe("diverge"); + expect(verdict.reason).toContain("sensitivity"); + }); + + // T-R3-1: extend M-5 "both-missing → diverge" precedent to the required + // structural text fields (title, content). CandidateFragmentObject declares + // title: z.string() and content: z.string() — both REQUIRED, no .default(), + // no .optional(). Two fragments both lacking `title` (or both lacking + // `content`) are both malformed; the relaxed comparator must NOT collapse + // them to an empty-string Jaccard 1.0 silent pass. + + it("T-R3-1 both-missing title: neither side has title → diverge", () => { + const runA = baseFragment(); + const runB = baseFragment(); + delete (runA as any).title; + delete (runB as any).title; + + const verdict = runDualRun({ + runA, + runB, + seedAvailable: false, + relaxedComparatorAvailable: true, + }); + + expect(verdict.result).toBe("diverge"); + expect(verdict.reason).toContain("title"); + }); + + it("T-R3-1 both-non-string title: neither side has string title → diverge", () => { + const runA = baseFragment(); + const runB = baseFragment(); + (runA as any).title = 42; + (runB as any).title = null; + + const verdict = runDualRun({ + runA, + runB, + seedAvailable: false, + relaxedComparatorAvailable: true, + }); + + expect(verdict.result).toBe("diverge"); + expect(verdict.reason).toContain("title"); + }); + + it("T-R3-1 both-missing content: neither side has content → diverge", () => { + const runA = baseFragment(); + const runB = baseFragment(); + delete (runA as any).content; + delete (runB as any).content; + + const verdict = runDualRun({ + runA, + runB, + seedAvailable: false, + relaxedComparatorAvailable: true, + }); + + expect(verdict.result).toBe("diverge"); + expect(verdict.reason).toContain("content"); + }); + + // T-R3-2: extend M-5 "both-missing → diverge" precedent to the evidence + // array. The schema declares `evidence: z.array(...).default([])` so AFTER + // parse evidence is always an array — but the comparator receives untyped + // `object` from upstream and is the structural pin against malformation + // bypassing the parser. Both sides missing the evidence field entirely + // means both fragments are malformed in the same way → diverge, not silent + // empty-array match. + + it("T-R3-2 both-missing evidence: neither side has evidence → diverge", () => { + const runA = baseFragment(); + const runB = baseFragment(); + delete (runA as any).evidence; + delete (runB as any).evidence; + + const verdict = runDualRun({ + runA, + runB, + seedAvailable: false, + relaxedComparatorAvailable: true, + }); + + expect(verdict.result).toBe("diverge"); + expect(verdict.reason).toMatch(/evidence/); + }); + + // T-R4-1: extend the M-5 / T-R3-2 "both-missing → diverge" precedent to the + // ASYMMETRIC mixed-shape variant. When ONE side's `evidence` is a non-array + // value (undefined / string / scalar / object) and the OTHER side is a + // well-formed array (including the empty array `[]`), the prior `?? []` + // fallback collapsed both sides to length-0 and the per-index loop trivially + // matched — a silent relaxed-match on a structurally divergent pair. Per + // spec §7.6, structurally different evidence shapes must diverge. + + it("T-R4-1 asymmetric evidence: undefined vs [] → diverge", () => { + const runA = baseFragment(); + const runB = baseFragment(); + // Keep the `evidence` key present on both sides (so the top-level key-set + // check does not fire first); set runA to a non-array value to exercise + // the XOR shape-mismatch branch directly. + (runA as any).evidence = undefined; // non-array (undefined) + (runB as any).evidence = []; // valid empty array + + const verdict = runDualRun({ + runA, + runB, + seedAvailable: false, + relaxedComparatorAvailable: true, + }); + + expect(verdict.result).toBe("diverge"); + expect(verdict.reason).toMatch(/evidence/); + }); + + it("T-R4-1 asymmetric evidence: non-array string vs [] → diverge", () => { + const runA = baseFragment(); + const runB = baseFragment(); + (runA as any).evidence = "not-an-array"; + (runB as any).evidence = []; + + const verdict = runDualRun({ + runA, + runB, + seedAvailable: false, + relaxedComparatorAvailable: true, + }); + + expect(verdict.result).toBe("diverge"); + expect(verdict.reason).toMatch(/evidence/); + }); + + // T-R4-2: extend the T-R3-1 "both-missing/non-string → diverge" precedent + // to the ASYMMETRIC mixed-shape variant for text fields. When ONE side has + // a valid empty string `""` and the OTHER side is non-string (undefined / + // number / null / object), the prior `?? ""` fallback collapsed both to + // `""`, Jaccard("", "") = 1.0, and the gate silently relaxed-matched on a + // structurally divergent pair. Per spec §7.6 + the schema's required + // `z.string()` declaration, shape-mismatch must diverge. + + it("T-R4-2 asymmetric title: empty string vs undefined → diverge", () => { + const runA = baseFragment(); + const runB = baseFragment(); + // Keep the `title` key present on both sides (so the top-level key-set + // check does not fire first); the non-string side exercises the text-XOR + // shape-mismatch branch directly. + (runA as any).title = ""; // valid empty string + (runB as any).title = undefined; // non-string (undefined value) + + const verdict = runDualRun({ + runA, + runB, + seedAvailable: false, + relaxedComparatorAvailable: true, + }); + + expect(verdict.result).toBe("diverge"); + expect(verdict.reason).toContain("title"); + }); + + it("T-R4-2 asymmetric content: empty string vs non-string → diverge", () => { + const runA = baseFragment(); + const runB = baseFragment(); + (runA as any).content = ""; // valid empty string + (runB as any).content = 42; // non-string number + + const verdict = runDualRun({ + runA, + runB, + seedAvailable: false, + relaxedComparatorAvailable: true, + }); + + expect(verdict.result).toBe("diverge"); + expect(verdict.reason).toContain("content"); + }); + + it("neither seed nor relaxed comparator available → gated", () => { + const runA = { title: "x" }; + const runB = { title: "x" }; + + const verdict = runDualRun({ + runA, + runB, + seedAvailable: false, + relaxedComparatorAvailable: false, + }); + + expect(verdict.result).toBe("gated"); + expect(verdict.reason).toMatch(/neither/i); + }); +}); diff --git a/src/__tests__/atlas-refinement-coverage.test.ts b/src/__tests__/atlas-refinement-coverage.test.ts new file mode 100644 index 0000000..e84792b --- /dev/null +++ b/src/__tests__/atlas-refinement-coverage.test.ts @@ -0,0 +1,138 @@ +import { describe, it, expect } from "vitest"; +import { readFileSync } from "node:fs"; +import { resolve } from "node:path"; + +// ── T9 — Refinement-coverage stale-doc guard (spec §4.1.1 / §7.9) ───────────── +// +// `zod-to-json-schema` silently drops every `.refine(...)`, `.superRefine(...)`, +// `.transform(...)`, and `.regex(...)` it cannot translate. The orchestration +// shell hands the JSON-Schema'd document to the structured-output call, so +// every Zod runtime predicate that does NOT round-trip into JSON Schema MUST +// be wired into a post-pass Zod parse in the `atlas harvest write-fragment +// --stdin` CLI helper (spec §4.2.1 STEP 2 + §4.6). Otherwise the predicate is +// "silently lost" and malformed fragments land on disk. +// +// This test future-guards against the silent-drop class of bug: it walks +// `src/atlas/types.ts`, counts every `.refine(`, `.superRefine(`, and +// `.transform(` token IN CODE (comments stripped), then counts the rows in +// the refinement-coverage table in `docs/atlas/refinement-coverage.md`, and +// asserts the two counts agree. If a contributor adds a new refinement to +// `types.ts` without adding a corresponding doc row (and therefore without +// thinking about where to wire it into the post-pass), the test fails with +// a stale-doc message that names the drift. + +const REPO_ROOT = resolve(__dirname, "..", ".."); +const TYPES_PATH = resolve(REPO_ROOT, "src", "atlas", "types.ts"); +const DOC_PATH = resolve(REPO_ROOT, "docs", "atlas", "refinement-coverage.md"); + +// Strip `/* ... */` block comments and `// ...` line comments from a TS source +// string. We strip block comments first (they may span multiple lines and +// could contain `//` inside them); then we strip line comments. This is not a +// full TypeScript tokenizer, but it is sufficient to keep the refinement +// counter from picking up the in-source future-edit note that mentions +// `.refine(...)` inside a `//` comment on line ~163 of `types.ts`. +function stripComments(src: string): string { + // Remove /* ... */ (non-greedy, multiline). + const noBlock = src.replace(/\/\*[\s\S]*?\*\//g, ""); + // Remove // ... to end of line. + const noLine = noBlock.replace(/\/\/[^\n]*/g, ""); + return noLine; +} + +// Count non-overlapping occurrences of a regex needle in `body`. +function countOccurrences(body: string, needle: RegExp): number { + const matches = body.match(needle); + return matches === null ? 0 : matches.length; +} + +// Count rows in the FIRST GitHub-flavored markdown table in `doc` that has the +// expected refinement-coverage header (`| Refinement | Schema |`). The row +// count EXCLUDES the header row and the `|---|---|...|` separator row. +function countTableRows(doc: string): number { + const lines = doc.split("\n"); + let inTable = false; + let sawSeparator = false; + let rowCount = 0; + for (const line of lines) { + const trimmed = line.trim(); + if (!inTable) { + // The header row we're targeting: `| Refinement | Schema | JSON-Schema-expressible? | Post-pass note |` + if (/^\|\s*Refinement\s*\|\s*Schema\s*\|/i.test(trimmed)) { + inTable = true; + } + continue; + } + // Inside the table. + if (!sawSeparator) { + // The separator line: `|---|---|---|---|` + if (/^\|\s*-+\s*(\|\s*-+\s*)+\|?$/.test(trimmed)) { + sawSeparator = true; + } + continue; + } + // Data row OR end of table. A data row starts with `|`. A blank line or a + // non-`|` line ends the table. + if (trimmed.startsWith("|")) { + rowCount += 1; + continue; + } + if (trimmed === "") { + break; + } + // Some other content — treat as end of table. + break; + } + return rowCount; +} + +describe("atlas refinement coverage (T9 — stale-doc guard)", () => { + it("doc table row count matches source refinement count", () => { + const typesSrc = readFileSync(TYPES_PATH, "utf8"); + const docSrc = readFileSync(DOC_PATH, "utf8"); + + const codeOnly = stripComments(typesSrc); + const refineCount = countOccurrences(codeOnly, /\.refine\(/g); + const superRefineCount = countOccurrences(codeOnly, /\.superRefine\(/g); + const transformCount = countOccurrences(codeOnly, /\.transform\(/g); + const sourceCount = refineCount + superRefineCount + transformCount; + + const tableRows = countTableRows(docSrc); + + expect( + tableRows, + `refinement-coverage.md is stale — ${sourceCount} source refinements ` + + `(refine=${refineCount}, superRefine=${superRefineCount}, transform=${transformCount}) ` + + `vs ${tableRows} table rows. Update docs/atlas/refinement-coverage.md.`, + ).toBe(sourceCount); + }); + + it("doc Summary block reports a Total count that matches source", () => { + // A second, weaker assertion: the human-readable Summary block in the + // doc lists the total refinement count. If a contributor updates the + // table but forgets to update the summary numerals, that's also drift. + const typesSrc = readFileSync(TYPES_PATH, "utf8"); + const docSrc = readFileSync(DOC_PATH, "utf8"); + + const codeOnly = stripComments(typesSrc); + const sourceCount = + countOccurrences(codeOnly, /\.refine\(/g) + + countOccurrences(codeOnly, /\.superRefine\(/g) + + countOccurrences(codeOnly, /\.transform\(/g); + + // Match `Total refinements / transforms in \`src/atlas/types.ts\`: **N**` + const totalMatch = docSrc.match( + /Total refinements[^\n]*\*\*\s*(\d+)\s*\*\*/i, + ); + expect( + totalMatch, + "refinement-coverage.md is missing a `Total refinements ... **N**` summary line.", + ).not.toBeNull(); + const docTotal = Number(totalMatch![1]); + expect( + docTotal, + `refinement-coverage.md summary is stale — source has ${sourceCount} ` + + `refinements, summary says ${docTotal}. ` + + `Update docs/atlas/refinement-coverage.md.`, + ).toBe(sourceCount); + }); +}); diff --git a/src/atlas/dual-run.ts b/src/atlas/dual-run.ts new file mode 100644 index 0000000..72fa0cc --- /dev/null +++ b/src/atlas/dual-run.ts @@ -0,0 +1,315 @@ +// Phase-2 dual-run shadow gate (spec §6.2, §7.6 / T10). +// +// runDualRun compares two structured-output draws (runA, runB) for the SAME +// fragment-shaped target and produces a verdict the harness uses to decide +// whether Phase 2 may advance. The comparator has three precondition branches: +// +// (a) seed-present (deterministic control): canonicalize both runs via +// `canonicalizeFragment` and require byte-equality of the resulting +// JSON.stringify. If they match → "match". Else → "diverge", with a +// reason naming the FIRST diverging top-level field. +// +// (b) no-seed but a relaxed comparator is available: compare structurally +// — same top-level shape (same key set), enum fields byte-identical +// at their real paths (the five classification enums nested under +// `provenance.classification.*` — sensitivity, knowledge_type, +// validation_status, confidence, provenance_class — plus top-level +// `sourcetype` and per-item `evidence[].kind`), and free-text fields +// (title, content) with similarity ≥ 0.95. Pass → "relaxed-match"; +// else → "diverge". +// Similarity uses a simple word-set Jaccard (|A∩B| / |A∪B|) — chosen +// over character-bigram cosine to avoid pulling in an extra dependency; +// Jaccard is robust enough for the gate threshold and trivially +// reproducible. See SIMILARITY_THRESHOLD below. +// +// (c) neither available: "gated" — Phase 2 cannot advance. +// +// Verdicts are diagnostic, not destructive — the gate refuses to advance +// rather than dropping data. + +import { canonicalizeFragment } from "./canonicalize.js"; + +// Classification enum fields whose values must be byte-identical in the +// no-seed relaxed branch. CandidateFragmentObject (src/atlas/types.ts) puts +// these FIVE under `provenance.classification.` — they are NOT top- +// level on the fragment. Reading them at the wrong nesting level silently +// passes every check on a real fragment; M-1 fixed that. +const CLASSIFICATION_ENUM_FIELDS = [ + "sensitivity", + "knowledge_type", + "validation_status", + "confidence", + "provenance_class", +] as const; + +// Free-text fields compared by similarity in the relaxed branch. +const TEXT_FIELDS = ["title", "content"] as const; + +// Jaccard similarity threshold for the relaxed comparator — same threshold +// the spec calls for under the cosine framing; Jaccard is the equivalent +// set-overlap measure for our short, mostly-token-distinct strings. +const SIMILARITY_THRESHOLD = 0.95; + +export type DualRunResult = "match" | "relaxed-match" | "diverge" | "gated"; + +export interface DualRunVerdict { + result: DualRunResult; + reason: string; +} + +export interface DualRunOptions { + runA: object; + runB: object; + seedAvailable: boolean; + relaxedComparatorAvailable: boolean; +} + +// Word-set Jaccard similarity in [0, 1]. Two empty strings are defined as +// identical (similarity 1) — they are byte-equal and the relaxed comparator +// has nothing to disagree about. +function jaccardSimilarity(a: string, b: string): number { + const tokens = (s: string): Set => + new Set( + s + .toLowerCase() + .split(/\s+/) + .filter((t) => t.length > 0), + ); + const setA = tokens(a); + const setB = tokens(b); + if (setA.size === 0 && setB.size === 0) return 1; + let intersection = 0; + for (const t of setA) if (setB.has(t)) intersection += 1; + const union = setA.size + setB.size - intersection; + if (union === 0) return 1; + return intersection / union; +} + +// Find the first top-level field whose canonicalized JSON differs between +// runA and runB. Returns the field name, or null if the two are byte-equal +// at every top-level key (in which case they should also stringify equal). +function firstDivergingField( + a: Record, + b: Record, +): string | null { + const keys = new Set([...Object.keys(a), ...Object.keys(b)]); + // Sort for determinism — we want the SAME "first" field to be reported + // regardless of object key-insertion order on either side. + const sortedKeys = Array.from(keys).sort(); + for (const k of sortedKeys) { + if (JSON.stringify(a[k]) !== JSON.stringify(b[k])) return k; + } + return null; +} + +export function runDualRun(opts: DualRunOptions): DualRunVerdict { + const { runA, runB, seedAvailable, relaxedComparatorAvailable } = opts; + + // Branch (a): seed-present — strict byte-equality after canonicalize. + if (seedAvailable) { + const canonA = canonicalizeFragment(runA) as Record; + const canonB = canonicalizeFragment(runB) as Record; + if (JSON.stringify(canonA) === JSON.stringify(canonB)) { + return { + result: "match", + reason: "seed-present byte-equality after canonicalize", + }; + } + const field = firstDivergingField(canonA, canonB); + return { + result: "diverge", + reason: field + ? `seed-present canonicalized runs diverge at field "${field}"` + : "seed-present canonicalized runs diverge", + }; + } + + // Branch (b): no-seed but relaxed comparator available. + if (relaxedComparatorAvailable) { + const a = runA as Record; + const b = runB as Record; + + // Shape compat: same top-level key set. Schema validation happened + // upstream, so we only need to confirm the two runs are comparing the + // same field surface. + const keysA = new Set(Object.keys(a)); + const keysB = new Set(Object.keys(b)); + if (keysA.size !== keysB.size) { + return { + result: "diverge", + reason: `no-seed relaxed: top-level key sets differ in size (${keysA.size} vs ${keysB.size})`, + }; + } + for (const k of keysA) { + if (!keysB.has(k)) { + return { + result: "diverge", + reason: `no-seed relaxed: key "${k}" missing on runB`, + }; + } + } + + // Classification enum fields: byte-identical, read at the REAL nested + // path `provenance.classification.`. A future regression that + // moved them top-level (or renamed `classification`) would re-trip the + // both-missing rule below and surface as `diverge`, not a silent pass. + const classA = + ((a.provenance as Record | undefined)?.classification as + | Record + | undefined) ?? {}; + const classB = + ((b.provenance as Record | undefined)?.classification as + | Record + | undefined) ?? {}; + for (const field of CLASSIFICATION_ENUM_FIELDS) { + const hasA = field in classA; + const hasB = field in classB; + // M-5: schema requires every classification enum present on every + // valid fragment; both-missing means at least one side is malformed, + // which is a structural divergence from the contract. + if (!hasA && !hasB) { + return { + result: "diverge", + reason: `no-seed relaxed: classification enum "${field}" missing on both sides`, + }; + } + if (JSON.stringify(classA[field]) !== JSON.stringify(classB[field])) { + return { + result: "diverge", + reason: `no-seed relaxed: classification enum "${field}" differs`, + }; + } + } + + // Top-level `sourcetype` enum: also covered by spec §7.6. Per the M-5 / + // T-R3-1 / T-R3-2 "both-missing → diverge" precedent, `sourcetype` is a + // REQUIRED structural enum on CandidateFragmentObject (no .optional(), no + // .default()). Without an explicit both-missing guard, the JSON.stringify + // compare below collapses to `undefined === undefined` and silent-passes. + const hasSourcetypeA = "sourcetype" in a; + const hasSourcetypeB = "sourcetype" in b; + if (!hasSourcetypeA && !hasSourcetypeB) { + return { + result: "diverge", + reason: `no-seed relaxed: enum field "sourcetype" missing on both sides`, + }; + } + if (JSON.stringify(a.sourcetype) !== JSON.stringify(b.sourcetype)) { + return { + result: "diverge", + reason: `no-seed relaxed: enum field "sourcetype" differs`, + }; + } + + // Per-item `evidence[].kind` enum: both sides must have the same number + // of evidence items AND the same `kind` discriminant at each index. A + // length or kind mismatch is a structural enum divergence; positional + // alignment matches the per-index canonicalize ordering. + // + // T-R3-2: extension of the M-5 "both-missing → diverge" precedent. The + // schema declares `evidence: z.array(...).default([])` — i.e. AFTER parse + // it is always an array. The comparator receives `object` (untyped) and + // serves as the structural pin against malformation that bypasses the + // parser. If both sides are missing/non-array, the prior `?? []` fallback + // would collapse both to length-0 and silently pass — that is the silent + // pass on a malformed shape M-5 codified against. Diverge instead. + const evAArray = Array.isArray(a.evidence); + const evBArray = Array.isArray(b.evidence); + // T-R4-1: ASYMMETRIC mixed-shape XOR. When one side is an array and the + // other is not, the prior `?? []` fallback collapsed the non-array side + // to length-0 and silently relaxed-matched against a well-formed empty + // array on the other side. Per spec §7.6, structurally different + // evidence shapes must diverge — check XOR BEFORE the both-missing + // branch so the asymmetric class is closed. + if (evAArray !== evBArray) { + return { + result: "diverge", + reason: + "no-seed relaxed: evidence shape mismatch (one side is not an array)", + }; + } + if (!evAArray && !evBArray) { + return { + result: "diverge", + reason: "no-seed relaxed: evidence array missing on both sides", + }; + } + // Both arrays at this point — proceed with length + per-index check. + const evA = a.evidence as unknown[]; + const evB = b.evidence as unknown[]; + if (evA.length !== evB.length) { + return { + result: "diverge", + reason: `no-seed relaxed: evidence array length differs (${evA.length} vs ${evB.length})`, + }; + } + for (let i = 0; i < evA.length; i += 1) { + const kA = (evA[i] as Record | undefined)?.kind; + const kB = (evB[i] as Record | undefined)?.kind; + if (JSON.stringify(kA) !== JSON.stringify(kB)) { + return { + result: "diverge", + reason: `no-seed relaxed: evidence[${i}].kind differs`, + }; + } + } + + // Free-text fields: Jaccard similarity ≥ threshold. + // + // T-R3-1: extension of the M-5 "both-missing → diverge" precedent. + // CandidateFragmentObject declares `title: z.string()` and + // `content: z.string()` — both REQUIRED (no `.default()`, no + // `.optional()`). When BOTH sides have a missing/non-string value, the + // prior empty-string fallback would produce two empty token sets, the + // jaccard function returns 1.0 for two empty strings, and the relaxed + // branch silently matched on a structurally-malformed pair. Pre-check + // for both-missing on each required text field and diverge BEFORE + // hitting the similarity fallback. + for (const field of TEXT_FIELDS) { + const aIsString = typeof a[field] === "string"; + const bIsString = typeof b[field] === "string"; + // T-R4-2: ASYMMETRIC mixed-shape XOR. When one side has a valid + // (possibly empty) string and the other side is non-string, the prior + // `?? ""` fallback collapsed the non-string side to `""` and + // Jaccard("", "") = 1.0 silently relaxed-matched a structurally + // divergent pair. Per spec §7.6 + the schema's required `z.string()` + // declaration, shape-mismatch must diverge — check XOR BEFORE the + // both-missing branch so the asymmetric class is closed. + if (aIsString !== bIsString) { + return { + result: "diverge", + reason: `no-seed relaxed: text field "${field}" shape mismatch (one side is not a string)`, + }; + } + if (!aIsString && !bIsString) { + return { + result: "diverge", + reason: `no-seed relaxed: text field "${field}" missing on both sides`, + }; + } + // Both strings at this point — proceed with Jaccard similarity. + const ta = a[field] as string; + const tb = b[field] as string; + const sim = jaccardSimilarity(ta, tb); + if (sim < SIMILARITY_THRESHOLD) { + return { + result: "diverge", + reason: `no-seed relaxed: text field "${field}" similarity ${sim.toFixed(3)} < ${SIMILARITY_THRESHOLD}`, + }; + } + } + + return { + result: "relaxed-match", + reason: + "no-seed relaxed: shape + enums equal, text similarity above threshold", + }; + } + + // Branch (c): neither precondition met — gate refuses to advance. + return { + result: "gated", + reason: + "neither seed control nor relaxed comparator is available; Phase 2 cannot advance", + }; +} From 9c5bee90b8d64b85b57c32dd938b21208554beda Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 12 Jun 2026 11:36:52 -0700 Subject: [PATCH 6/6] Document Phase-0 schema-enforced write path and fragment on-disk contract --- runs/fragments/README.md | 64 ++++++++ scripts/atlas-harvest/blitz-manifest.md | 2 + scripts/atlas-harvest/leaf-prompt.md | 195 +++++++++++++++++++----- 3 files changed, 226 insertions(+), 35 deletions(-) create mode 100644 runs/fragments/README.md diff --git a/runs/fragments/README.md b/runs/fragments/README.md new file mode 100644 index 0000000..39746e2 --- /dev/null +++ b/runs/fragments/README.md @@ -0,0 +1,64 @@ +# Atlas Harvest — fragment on-disk contract + +Fragments under `//fragments/.json` are the canonical +durable artifact of a Tier-1 leaf-fleet run. They are the seam between the +agent-orchestration half (the leaf fleet) and the deterministic in-process +half (`atlas harvest run` and downstream tiers). + +## On-disk format + +One JSON object per file, pretty-printed, validated against +`CandidateFragmentSchema` (in `src/atlas/types.ts`) — or +`EpisodicCandidateFragmentSchema` when `sourcetype: "episodic"`, which layers +the four episodic-invariant refinements (`needsReview`, `provenance_class`, +`confidence`, `validation_status`) on top of the base. + +See `scripts/atlas-harvest/leaf-prompt.md` for the field-by-field contract and +worked examples. + +## Stem derivation + +The file stem is supplied explicitly via `--stem ` to the +`atlas harvest write-fragment` CLI. When `--stem` is omitted, the stem is +derived from the fragment's canonical-key components — concretely +`claimSlug(::claimSlug(claimSlugHint || title))` +(`claimSlugHint` is optional on `CandidateFragmentSchema`; the CLI falls back +to the fragment `title` when no hint is supplied). The stem derivation and +the fragment's `canonical_key` are produced by different functions and yield +different strings — the stem is a filesystem-safe slug, not a copy of the +canonical key. The derivation is still idempotent across runs and two +fragments with the same claim text but different sourcetype/subsystem never +collide. + +## Canonical write boundary + +Only `atlas harvest write-fragment --stdin` writes into this directory in +Phase 0. Direct `fs.writeFile` from leaves is deprecated as of Phase 0 — it +still works (existing leaves are not broken) but it is no longer the supported +write path, and Phase 1 will remove the leaf-side writer entirely. + +The write CLI reads a single fragment JSON from stdin, validates it, and +writes it to `//fragments/.json`. + +## Schema validation + +The CLI Zod-parses the input before writing. Exit-code matrix (spec §4.2.1): + +- `0` — success (fragment written; absolute path printed to stdout) +- `1` — stdin/IO failure (bad JSON, unreadable stdin, write error other than EEXIST) +- `2` — stem collision (file already exists) +- `3` — schema validation failure (base `CandidateFragmentSchema` rejected the input) +- `4` — episodic invariant violation (one of `needsReview`/`provenance_class`/`confidence`/`validation_status` failed the episodic refinement) + +stderr always carries the underlying Zod / IO error message; the exit code +distinguishes the FAILURE CLASS so the caller (leaf adapter, CI gate) can +route accordingly. + +## Atomic create + +The CLI creates fragment files EXCLUSIVELY (the underlying open uses the `wx` +flag). A pre-existing file at the same stem yields exit code 2 (`EEXIST`) and +no write occurs — the prior fragment is never silently overwritten. + +To re-mint a fragment at the same stem, delete the file first (or run with a +fresh `--run-id`). diff --git a/scripts/atlas-harvest/blitz-manifest.md b/scripts/atlas-harvest/blitz-manifest.md index cae657f..16cc71c 100644 --- a/scripts/atlas-harvest/blitz-manifest.md +++ b/scripts/atlas-harvest/blitz-manifest.md @@ -34,6 +34,8 @@ runs AFTER the fleet, over the fragments this fleet produces. | `FRAGMENTS_DIR` | Absolute path to `runs//fragments/`. The single write target. | | `AS_OF` | The harvest "as of" calendar date (`YYYY-MM-DD`) stamped into provenance freshness for sources that lack their own date. | +- Phase-0 canonical write path: pipe fragments through `atlas harvest write-fragment --stdin`. See `runs/fragments/README.md` for the on-disk contract. + ## Fragment id convention Each leaf owns a unique, filesystem-safe, deterministic file stem so parallel diff --git a/scripts/atlas-harvest/leaf-prompt.md b/scripts/atlas-harvest/leaf-prompt.md index 25cc2d5..340a522 100644 --- a/scripts/atlas-harvest/leaf-prompt.md +++ b/scripts/atlas-harvest/leaf-prompt.md @@ -88,33 +88,40 @@ Every fragment file is ONE object of this shape: ```jsonc { "sourcetype": "memory | episodic | github-pr | github-issue | notion-doc | linear-doc | agent-doc | derived", - "subsystem": "", // required — must NOT contain ':' (canonical-key delimiter) or '⟦'/'⟧' (approval-marker delimiters); the schema hard-rejects all three - "claimSlugHint": "", // optional - "source_name": "", // required + "subsystem": "", // required — must NOT contain ':' (canonical-key delimiter) or '⟦'/'⟧' (approval-marker delimiters); the schema hard-rejects all three + "claimSlugHint": "", // optional + "source_name": "", // required "repo_url": "", "ref": "", - "title": "", // required - "content": "", // required - "provenance": { // required - "source": "", // required + "title": "", // required + "content": "", // required + "provenance": { + // required + "source": "", // required "url": "", "date": "", "commit": "", "version": "", "validated_against": "", - "classification": { // required — all 7 dims + "classification": { + // required — all 7 dims "sensitivity": "public | internal | proprietary | secret", "knowledge_type": "architecture | design-rationale | root-cause | ownership | operational | protocol | security | process | product | gtm | org-culture", - "audience": "", // defaults to "all-staff" + "audience": "", // defaults to "all-staff" "validation_status": "unverified | source-verified | showcase-verified", "confidence": "high | medium | low", "provenance_class": "primary | derived", - "freshness": { "as_of": "YYYY-MM-DD", "re_verify_by": "YYYY-MM-DD (optional)" } - } + "freshness": { + "as_of": "YYYY-MM-DD", + "re_verify_by": "YYYY-MM-DD (optional)", + }, + }, }, - "evidence": [ /* zero or more, kind-discriminated — see below */ ], - "needsReview": false, // episodic ⇒ true - "validationTargets": [ "", "..." ] + "evidence": [ + /* zero or more, kind-discriminated — see below */ + ], + "needsReview": false, // episodic ⇒ true + "validationTargets": ["", "..."], } ``` @@ -143,54 +150,142 @@ Rules the leaf must honor (the adapters enforce these — match them): --- +## Phase-0 schema-enforced write path + + + +In Phase 0 the leaf no longer writes its own JSON file via `fs.writeFile`. The +orchestration shell composes the fragment object (or the harness produces it +via `agent(prompt, {schema})` structured output) and pipes it to the canonical +write CLI: + +``` +echo "$fragment_json" | atlas harvest write-fragment \ + --run-id --runs-dir [--stem ] --stdin +``` + +The CLI validates against `CandidateFragmentSchema` (or +`EpisodicCandidateFragmentSchema` when `sourcetype === "episodic"`, which +layers the four episodic-invariant refinements on top of the base), then +writes the validated fragment EXCLUSIVELY to +`//fragments/.json`. `--stem` is OPTIONAL — when +omitted, the CLI derives the stem from the fragment's canonical-key components +(`claimSlug(::claimSlug(claimSlugHint || title))` — +`claimSlugHint` is optional, so the CLI falls back to the fragment `title` +when no hint is supplied) so the same write path remains idempotent across +canonicalize. + +Exit-code matrix (spec §4.2.1): + +- `0` — success (fragment written; absolute path printed to stdout) +- `1` — stdin/IO failure (bad JSON, unreadable stdin, write error other than EEXIST) +- `2` — stem collision (file already exists) +- `3` — schema validation failure (base `CandidateFragmentSchema` rejected the input) +- `4` — episodic invariant violation (one of `needsReview`/`provenance_class`/`confidence`/`validation_status`) + +Phase 0 ships the CLI; the HOW-TO-WRITE-JSON section above is unchanged and +leaves can still emit JSON in their reports the same way. Phase 1 (a separate +PR) rewrites this prompt to auto-generate the schema block from the Zod source +in `src/atlas/types.ts`, eliminating the hand-written field list as the +single source of truth. + +--- + ## Per-family `*Unit` input shapes (what you assemble in STEP 2) These are the exact adapter input shapes (from `src/atlas/adapters/*.ts`). **memory** (`MemoryFileUnit`): + ```jsonc -{ "filename": "memory/feedback_nextjs_bundles_node_modules.md", "contents": "" } +{ + "filename": "memory/feedback_nextjs_bundles_node_modules.md", + "contents": "", +} ``` **github-pr** (`GitHubPullRequestUnit`): + ```jsonc { "kind": "pull_request", "sourceName": "github-pr:CopilotKit/pathfinder#1746", - "repo": { "fullName": "CopilotKit/pathfinder", "cloneUrl": "https://github.com/CopilotKit/pathfinder.git", "defaultBranch": "main" }, - "pullRequest": { "number": 1746, "title": "...", "body": "...", "htmlUrl": "https://github.com/.../pull/1746", - "mergeCommitSha": "...", "baseRef": "main", "headRef": "...", "author": "...", "mergedBy": "..." }, - "changedFiles": ["src/db/atlas.ts"], "linkedIssues": ["https://github.com/.../issues/1732"], "reviewThreads": ["..."] + "repo": { + "fullName": "CopilotKit/pathfinder", + "cloneUrl": "https://github.com/CopilotKit/pathfinder.git", + "defaultBranch": "main", + }, + "pullRequest": { + "number": 1746, + "title": "...", + "body": "...", + "htmlUrl": "https://github.com/.../pull/1746", + "mergeCommitSha": "...", + "baseRef": "main", + "headRef": "...", + "author": "...", + "mergedBy": "...", + }, + "changedFiles": ["src/db/atlas.ts"], + "linkedIssues": ["https://github.com/.../issues/1732"], + "reviewThreads": ["..."], } ``` **github-issue** (`GitHubIssueUnit`): + ```jsonc { "kind": "issue", "sourceName": "github-issue:CopilotKit/pathfinder#1732", "repo": { "fullName": "...", "cloneUrl": "...", "defaultBranch": "main" }, - "issue": { "number": 1732, "title": "...", "body": "...", "htmlUrl": "...", "author": "...", "state": "closed" }, - "linkedIssues": [], "reviewThreads": [] + "issue": { + "number": 1732, + "title": "...", + "body": "...", + "htmlUrl": "...", + "author": "...", + "state": "closed", + }, + "linkedIssues": [], + "reviewThreads": [], } ``` **notion-doc** (`NotionPageUnit`): + ```jsonc { - "url": "https://www.notion.so/...", "title": "Interrupts Proposal — Design Decisions", - "subsystem": "agui-protocol", "repo_url": "", "ref": "", "date": "2026-05-20", - "sections": [ { "heading": "Decision 1: Resume tokens are opaque", "body": "..." }, { "heading": "Context", "body": "..." } ] + "url": "https://www.notion.so/...", + "title": "Interrupts Proposal — Design Decisions", + "subsystem": "agui-protocol", + "repo_url": "", + "ref": "", + "date": "2026-05-20", + "sections": [ + { "heading": "Decision 1: Resume tokens are opaque", "body": "..." }, + { "heading": "Context", "body": "..." }, + ], } ``` + (The adapter splits on decision headings: `Decision …`, `ADR …`, `N. …`. Non-decision sections like Context are page-level only.) **linear-doc** (`LinearDocUnit`): + ```jsonc { - "url": "https://linear.app/...", "title": "...", "problem": "...", "why": "...", - "nonGoals": ["..."], "citedFiles": ["src/..."], "notionCrossLink": "", - "subsystem": "runtime", "area": "", "updatedAt": "2026-05-30", "knowledgeType": "ownership" + "url": "https://linear.app/...", + "title": "...", + "problem": "...", + "why": "...", + "nonGoals": ["..."], + "citedFiles": ["src/..."], + "notionCrossLink": "", + "subsystem": "runtime", + "area": "", + "updatedAt": "2026-05-30", + "knowledgeType": "ownership", } ``` @@ -198,25 +293,49 @@ These are the exact adapter input shapes (from `src/atlas/adapters/*.ts`). invariants (`needsReview: true`, `validation_status: "unverified"`, `provenance_class: "derived"`, `confidence: "low"` clamped, `sensitivity` floored at `"internal"` preserving any stronger signal): + ```jsonc -{ "convPath": "", "date": "2026-06-07", "text": "", "subsystem": "" } +{ + "convPath": "", + "date": "2026-06-07", + "text": "", + "subsystem": "", +} ``` **agent-doc / source-comment** (`SourceCommentUnit`): + ```jsonc { "filePath": "packages/react-core/src/use-coagent-state-render-bridge.tsx", - "lineStart": 24, "lineEnd": 45, - "commentText": "", "codeRegion": "", - "subsystem": "react-core", "repoUrl": "", "ref": "", "sourceUrl": "" + "lineStart": 24, + "lineEnd": 45, + "commentText": "", + "codeRegion": "", + "subsystem": "react-core", + "repoUrl": "", + "ref": "", + "sourceUrl": "", } ``` **derived / showcase** (`ShowcaseUnit`): + ```jsonc { - "manifest": { "integration": "langgraph-python", "name": "LangGraph (Python)", "repo_url": "", "description": "", "features": ["agentic-chat", "gen-ui"] }, - "registry": { "version": "1", "categories": [ { "id": "...", "pills": [ { "id": "agentic-chat", "status": "green" } ] } ] } + "manifest": { + "integration": "langgraph-python", + "name": "LangGraph (Python)", + "repo_url": "", + "description": "", + "features": ["agentic-chat", "gen-ui"], + }, + "registry": { + "version": "1", + "categories": [ + { "id": "...", "pills": [{ "id": "agentic-chat", "status": "green" }] }, + ], + }, } ``` @@ -290,8 +409,14 @@ symbol as a `validationTarget`: } }, "evidence": [ - { "kind": "changed_file", "path": "packages/react-core/src/use-coagent-state-render-bridge.tsx:24-45" }, - { "kind": "fused_from", "ref": "source-comment:packages/react-core/src/use-coagent-state-render-bridge.tsx:24-45" } + { + "kind": "changed_file", + "path": "packages/react-core/src/use-coagent-state-render-bridge.tsx:24-45" + }, + { + "kind": "fused_from", + "ref": "source-comment:packages/react-core/src/use-coagent-state-render-bridge.tsx:24-45" + } ], "needsReview": false, "validationTargets": ["useCoagentStateRenderBridge"]