From 6c821e95d47497369cea958b14576ebf4ede0435 Mon Sep 17 00:00:00 2001 From: hyperpolymath Date: Sun, 17 May 2026 04:38:00 +0100 Subject: [PATCH] =?UTF-8?q?feat(provenance):=20fork-first-class=20chain=20?= =?UTF-8?q?model=20=E2=80=94=20ADR-0010=20(#31;=20supersedes=20#32)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements ADR-0010 (now Accepted). The provenance integrity property is tamper-evidence + no-silent-loss, NOT linearity: legitimate divergence (partitioned/replicated/offline writers, simulation branches) must be representable, persisted, detectable and verifiable. Schema - NO `UNIQUE(entity_id, previous_hash)` (#32, superseded): it would reject a divergent writer's honest history at insert time. The `hash` PRIMARY KEY is the correct duplicate guard (domain-tagged preimage). - Add non-unique `idx_provenance_predecessor` → O(log n) fork detection. - `verisimdb_provenance_chain_heads(entity_id, head_hash)` multi-head tip set; legacy single-head table kept one release with an idempotent `INSERT … SELECT` migration (no destructive DROP ships here). Behaviour - `append_provenance`: linear fast-path — extends the unique head; errors (not silently collapses) if the entity has >1 head. - `append_provenance_fork(from_hash)`: extends a specific ancestor, *adds* a head without removing one → the entity now has ≥2 tips. - `fork_points()`: every predecessor with >1 child. - `verify_chain`: per-branch walk (each tip → genesis) so a forked- but-honest entity verifies true while tampering any branch still fails. codegen `overlay.rs` mirrors the schema (harmful `ux_provenance_chain` removed); its DDL tests inverted to assert the ADR-0010 contract. Tests: `tests/provenance_fork_test.rs` rewritten to the 4 ADR-0010 cases (the previously failing-by-design test now passes) + `exact_duplicate_entry_is_rejected`. Full suite green (107 lib + 9 + 4 + 2, 0 failed). ADR-0010 Proposed → Accepted. Closes #31. Supersedes #32 (closed not-planned). Co-Authored-By: Claude Opus 4.7 --- ...0010-provenance-forks-are-first-class.adoc | 6 +- src/codegen/overlay.rs | 82 +++- src/tier1/provenance.rs | 419 ++++++++++++++---- tests/provenance_fork_test.rs | 203 +++++---- 4 files changed, 513 insertions(+), 197 deletions(-) diff --git a/docs/decisions/0010-provenance-forks-are-first-class.adoc b/docs/decisions/0010-provenance-forks-are-first-class.adoc index b8e4852..746927c 100755 --- a/docs/decisions/0010-provenance-forks-are-first-class.adoc +++ b/docs/decisions/0010-provenance-forks-are-first-class.adoc @@ -8,7 +8,11 @@ Date: 2026-05-16 ## Status -Proposed (design + failing test; tracks #31 and #32) +Accepted (2026-05-17) — implemented in #31 (multi-head tip set, +fork-aware append, `fork_points`, per-branch `verify_chain`, +non-destructive migration). #32 (the `UNIQUE INDEX(entity_id, +previous_hash)`) is superseded by this ADR and closed not-planned: +the `hash` PRIMARY KEY is the correct duplicate guard. ## Context diff --git a/src/codegen/overlay.rs b/src/codegen/overlay.rs index b14d771..5bf2137 100644 --- a/src/codegen/overlay.rs +++ b/src/codegen/overlay.rs @@ -185,12 +185,19 @@ fn generate_metadata_table(schema: &ParsedSchema) -> String { /// append-only, tamper-evident log (see /// `docs/theory/provenance-threat-model.adoc`). /// -/// The `chain_head` table is the per-entity head pointer used for the -/// write-path lock (V-L2-L1). The UNIQUE INDEX on `(entity_id, -/// previous_hash)` (V-L2-L2) makes chain forks structurally impossible -/// — defence in depth for if the lock is ever bypassed. +/// ADR-0010 (provenance forks are first-class): the `hash` PRIMARY KEY +/// is the duplicate guard (the preimage covers every tamper-relevant +/// field). We deliberately do **not** emit `UNIQUE(entity_id, +/// previous_hash)` (#32, superseded) — that rejects a divergent second +/// writer's legitimate history at insert time. Instead a **non-unique** +/// `idx_provenance_predecessor` makes fork *detection* O(log n), and the +/// chain tip is a *set* (`verisimdb_provenance_chain_heads`): one row +/// for a linear entity, several when it has legitimately forked. The +/// legacy single-head `verisimdb_provenance_chain_head` is kept one +/// release for non-destructive migration. Mirrors +/// `tier1::provenance::SIDECAR_DDL` (kept in sync). fn generate_provenance_table() -> String { - "-- Provenance: SHA-256 hash-chained audit trail\n\ + "-- Provenance: SHA-256 hash-chained audit trail (ADR-0010)\n\ CREATE TABLE IF NOT EXISTS verisimdb_provenance_log (\n\ \x20 hash TEXT PRIMARY KEY,\n\ \x20 previous_hash TEXT NOT NULL,\n\ @@ -203,19 +210,25 @@ fn generate_provenance_table() -> String { \x20 transformation TEXT, -- description of transformation applied\n\ \x20 CHECK (operation IN ('insert','update','delete','transform'))\n\ );\n\ - -- V-L2-L2: forbid chain forks at the DB level. Genesis records all\n\ - -- carry previous_hash='' so this also enforces a single genesis per\n\ - -- entity.\n\ - CREATE UNIQUE INDEX IF NOT EXISTS ux_provenance_chain\n\ + -- ADR-0010 #32 (superseded): NO UNIQUE(entity_id, previous_hash) —\n\ + -- a fork that cannot be written cannot be detected or audited. The\n\ + -- non-unique index below makes fork detection O(log n) instead.\n\ + CREATE INDEX IF NOT EXISTS idx_provenance_predecessor\n\ \x20 ON verisimdb_provenance_log(entity_id, previous_hash);\n\ CREATE INDEX IF NOT EXISTS idx_provenance_entity ON verisimdb_provenance_log(entity_id);\n\ CREATE INDEX IF NOT EXISTS idx_provenance_table ON verisimdb_provenance_log(table_name);\n\ \n\ - -- V-L2-L1: per-entity head pointer. The write path takes a row\n\ - -- lock here (SELECT … FOR UPDATE / BEGIN IMMEDIATE) so concurrent\n\ - -- appenders on the same entity serialise; cross-entity appends\n\ - -- remain parallel. Each successful append updates head_hash in\n\ - -- the same transaction as the INSERT into verisimdb_provenance_log.\n\ + -- ADR-0010 #31: chain-tip *set*. `append_provenance` keeps a\n\ + -- BEGIN IMMEDIATE write so racing duplicate appends on one node\n\ + -- still serialise; a linear append swaps its single tip, a\n\ + -- deliberate fork adds a tip without removing one.\n\ + CREATE TABLE IF NOT EXISTS verisimdb_provenance_chain_heads (\n\ + \x20 entity_id TEXT NOT NULL,\n\ + \x20 head_hash TEXT NOT NULL,\n\ + \x20 PRIMARY KEY (entity_id, head_hash)\n\ + );\n\ + -- Legacy single-head table: kept one release for non-destructive\n\ + -- migration (see tier1::provenance::SIDECAR_DDL). No DROP ships here.\n\ CREATE TABLE IF NOT EXISTS verisimdb_provenance_chain_head (\n\ \x20 entity_id TEXT PRIMARY KEY,\n\ \x20 head_hash TEXT NOT NULL,\n\ @@ -532,22 +545,47 @@ mod tests { assert!(ddl.contains("actor")); } - /// V-L2-L2: forks are forbidden by a UNIQUE INDEX on - /// (entity_id, previous_hash). + /// ADR-0010 (#32 superseded): forks are first-class. The fork guard + /// is the `hash` PRIMARY KEY (duplicate-rejection); there must be + /// NO `UNIQUE(entity_id, previous_hash)` (it would discard a + /// divergent writer's legitimate history). A *non-unique* + /// predecessor index provides O(log n) fork detection instead. #[test] - fn test_provenance_table_has_unique_chain_index() { + fn test_provenance_table_fork_detection_index_is_not_unique() { let ddl = generate_provenance_table(); - assert!(ddl.contains("UNIQUE INDEX")); - assert!(ddl.contains("ux_provenance_chain")); + assert!( + !ddl.contains("ux_provenance_chain"), + "the superseded UNIQUE(entity_id, previous_hash) must not be emitted" + ); + assert!( + !ddl.contains("CREATE UNIQUE INDEX IF NOT EXISTS ux_provenance"), + "no unique provenance-chain index (ADR-0010)" + ); + assert!( + ddl.contains("idx_provenance_predecessor"), + "non-unique fork-detection index must be present" + ); assert!(ddl.contains("(entity_id, previous_hash)")); } - /// V-L2-L1: chain_head table exists for per-entity write serialisation. + /// ADR-0010 #31: the chain tip is a *set* (multi-head); the legacy + /// single-head table is retained one release for migration. #[test] - fn test_provenance_table_has_chain_head() { + fn test_provenance_table_has_multihead_and_legacy_head() { let ddl = generate_provenance_table(); - assert!(ddl.contains("verisimdb_provenance_chain_head")); + assert!( + ddl.contains("verisimdb_provenance_chain_heads"), + "multi-head set table must exist" + ); + assert!( + ddl.contains("verisimdb_provenance_chain_head ("), + "legacy single-head table retained for migration" + ); assert!(ddl.contains("head_hash")); + assert!( + ddl.contains("PRIMARY KEY (entity_id, head_hash)"), + "multi-head table keyed by (entity_id, head_hash)" + ); } #[test] diff --git a/src/tier1/provenance.rs b/src/tier1/provenance.rs index cbe90f3..3bb2f87 100644 --- a/src/tier1/provenance.rs +++ b/src/tier1/provenance.rs @@ -39,16 +39,30 @@ pub use crate::abi::ProvenanceEntry; // SQLite sidecar schema // ========================================================================= -/// SQL DDL for the provenance sidecar schema. +/// SQL DDL for the provenance sidecar schema (ADR-0010: provenance forks +/// are first-class). /// -/// Two tables: +/// * `verisimdb_provenance_log` — append-only log of every entry. The +/// `hash` PRIMARY KEY *is* the duplicate guard: the preimage is +/// domain-tagged and covers every tamper-relevant field (ADR-0002 / +/// #27), so an exact-duplicate row necessarily collides on `hash`. +/// We deliberately do **not** add `UNIQUE(entity_id, previous_hash)` +/// (#32, superseded by ADR-0010): that would reject a *divergent* +/// second writer's legitimate history at insert time, making a real +/// fork impossible to record, detect or audit. +/// * `idx_provenance_predecessor` — **non-unique** index making fork +/// *detection* O(log n): two children of one predecessor are two +/// rows sharing `(entity_id, previous_hash)` with distinct `hash`. +/// * `verisimdb_provenance_chain_heads` — the set of live branch tips +/// per entity. One row per entity for a linear chain; several rows +/// when the entity has legitimately forked. +/// * `verisimdb_provenance_chain_head` — the legacy single-head table, +/// kept (non-destructively) one release for migration. New writes go +/// to `_chain_heads`; the `INSERT … SELECT` below copies any legacy +/// heads forward idempotently (no-op on a fresh sidecar). /// -/// * `verisimdb_provenance_log` — append-only log of every entry. -/// Mirrors `codegen/overlay.rs::generate_provenance_table` (kept in -/// sync — see ADR-0008 dialect-split work, #45). -/// * `verisimdb_provenance_chain_head` — per-entity pointer to the -/// tip of its chain, used by `append_provenance` to look up the -/// `previous_hash` in O(1) without scanning the log. +/// Mirrors `codegen/overlay.rs::generate_provenance_table` (kept in +/// sync — see ADR-0008 dialect-split work, #45). pub const SIDECAR_DDL: &str = "\ CREATE TABLE IF NOT EXISTS verisimdb_provenance_log (\ hash TEXT PRIMARY KEY,\ @@ -64,10 +78,19 @@ pub const SIDECAR_DDL: &str = "\ );\ CREATE INDEX IF NOT EXISTS idx_provenance_entity ON verisimdb_provenance_log(entity_id);\ CREATE INDEX IF NOT EXISTS idx_provenance_table ON verisimdb_provenance_log(table_name);\ + CREATE INDEX IF NOT EXISTS idx_provenance_predecessor \ + ON verisimdb_provenance_log(entity_id, previous_hash);\ CREATE TABLE IF NOT EXISTS verisimdb_provenance_chain_head (\ entity_id TEXT PRIMARY KEY,\ head_hash TEXT NOT NULL\ - );"; + );\ + CREATE TABLE IF NOT EXISTS verisimdb_provenance_chain_heads (\ + entity_id TEXT NOT NULL,\ + head_hash TEXT NOT NULL,\ + PRIMARY KEY (entity_id, head_hash)\ + );\ + INSERT OR IGNORE INTO verisimdb_provenance_chain_heads (entity_id, head_hash) \ + SELECT entity_id, head_hash FROM verisimdb_provenance_chain_head;"; /// Create the provenance sidecar tables in `conn` if they don't already /// exist. Idempotent — safe to call on every open of an existing @@ -99,20 +122,105 @@ pub fn append_provenance( actor: &str, before_snapshot: Option<&str>, transformation: Option<&str>, -) -> rusqlite::Result { +) -> anyhow::Result { + let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?; + + // Linear fast-path: the parent is the entity's *unique* current + // head. Zero heads ⇒ genesis (prev = ""). More than one head ⇒ the + // entity has legitimately forked and there is no single tip to + // extend; the caller must choose a branch via `append_provenance_fork` + // (ADR-0010 §2). + let heads = head_set(&tx, entity_id)?; + let prev_hash: String = match heads.len() { + 0 => String::new(), + 1 => heads[0].clone(), + n => { + return Err(anyhow::anyhow!(format!( + "entity {entity_id:?} has {n} chain heads (forked); linear \ + append is ambiguous — use append_provenance_fork(from_hash) \ + to extend a specific branch (ADR-0010)" + ))); + } + }; + + let timestamp = Utc::now(); + let hash = ProvenanceEntry::compute_hash( + &prev_hash, + entity_id, + operation, + actor, + ×tamp, + before_snapshot, + transformation, + ); + + insert_log_row( + &tx, + &hash, + &prev_hash, + entity_id, + table_name, + operation, + actor, + ×tamp, + before_snapshot, + transformation, + )?; + + // Linear advance: drop the consumed parent tip, add the new tip, so + // a normal append keeps exactly one head. + if !prev_hash.is_empty() { + tx.execute( + "DELETE FROM verisimdb_provenance_chain_heads \ + WHERE entity_id = ?1 AND head_hash = ?2", + params![entity_id, prev_hash], + )?; + } + add_head(&tx, entity_id, &hash)?; + + tx.commit()?; + Ok(hash) +} + +/// Extend the chain of `entity_id` from a *specific ancestor* `from_hash` +/// rather than the current tip — i.e. deliberately record a fork (ADR-0010 +/// §2). This is honest divergent history (partitioned/replicated/offline +/// writers, simulation branches), not tampering. +/// +/// Unlike [`append_provenance`], this *adds* a head without removing one: +/// the entity gains a new branch tip and now has ≥2 heads. `from_hash` +/// must be an existing entry in this entity's log. +#[allow(clippy::too_many_arguments)] +pub fn append_provenance_fork( + conn: &mut Connection, + entity_id: &str, + table_name: &str, + operation: &str, + actor: &str, + before_snapshot: Option<&str>, + transformation: Option<&str>, + from_hash: &str, +) -> anyhow::Result { let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?; - let prev_hash: String = tx + let ancestor_exists: bool = tx .query_row( - "SELECT head_hash FROM verisimdb_provenance_chain_head WHERE entity_id = ?1", - [entity_id], - |row| row.get(0), + "SELECT 1 FROM verisimdb_provenance_log \ + WHERE entity_id = ?1 AND hash = ?2", + params![entity_id, from_hash], + |_| Ok(true), ) - .unwrap_or_default(); + .unwrap_or(false); + if !ancestor_exists { + return Err(anyhow::anyhow!(format!( + "from_hash {from_hash:?} is not an entry in entity {entity_id:?}'s \ + chain; cannot fork from a non-existent ancestor" + ))); + } let timestamp = Utc::now(); let hash = ProvenanceEntry::compute_hash( - &prev_hash, + from_hash, entity_id, operation, actor, @@ -121,14 +229,104 @@ pub fn append_provenance( transformation, ); - tx.execute( + insert_log_row( + &tx, + &hash, + from_hash, + entity_id, + table_name, + operation, + actor, + ×tamp, + before_snapshot, + transformation, + )?; + + // A fork *adds* a tip and removes none: the entity now has ≥2 heads. + add_head(&tx, entity_id, &hash)?; + + tx.commit()?; + Ok(hash) +} + +/// A predecessor in `entity_id`'s log that has more than one child — +/// i.e. the point at which the history diverged. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ForkPoint { + /// Hash of the shared predecessor entry (or `""` for a forked genesis). + pub predecessor: String, + /// How many distinct children chain directly from it (always ≥ 2). + pub children: u64, +} + +/// Every fork point in `entity_id`'s history. Empty ⇒ the chain is +/// linear. O(log n) via `idx_provenance_predecessor` (ADR-0010 §1/§3). +pub fn fork_points(conn: &Connection, entity_id: &str) -> rusqlite::Result> { + let mut stmt = conn.prepare( + "SELECT previous_hash, COUNT(*) AS c \ + FROM verisimdb_provenance_log \ + WHERE entity_id = ?1 \ + GROUP BY previous_hash HAVING c > 1 \ + ORDER BY previous_hash", + )?; + let rows = stmt.query_map([entity_id], |row| { + Ok(ForkPoint { + predecessor: row.get::<_, String>(0)?, + children: row.get::<_, i64>(1)? as u64, + }) + })?; + rows.collect() +} + +// --- internal helpers ----------------------------------------------------- + +/// The current set of branch-tip hashes for `entity_id`. +fn head_set(conn: &Connection, entity_id: &str) -> rusqlite::Result> { + let mut stmt = conn.prepare( + "SELECT head_hash FROM verisimdb_provenance_chain_heads WHERE entity_id = ?1", + )?; + let rows = stmt.query_map([entity_id], |r| r.get::<_, String>(0))?; + rows.collect() +} + +/// Add `hash` to the entity's head set (idempotent). Also best-effort +/// updates the legacy single-head table so a one-release-old reader +/// still sees *a* head (it cannot represent the fork, but stays valid). +fn add_head(conn: &Connection, entity_id: &str, hash: &str) -> rusqlite::Result<()> { + conn.execute( + "INSERT OR IGNORE INTO verisimdb_provenance_chain_heads (entity_id, head_hash) \ + VALUES (?1, ?2)", + params![entity_id, hash], + )?; + conn.execute( + "INSERT OR REPLACE INTO verisimdb_provenance_chain_head (entity_id, head_hash) \ + VALUES (?1, ?2)", + params![entity_id, hash], + )?; + Ok(()) +} + +#[allow(clippy::too_many_arguments)] +fn insert_log_row( + conn: &Connection, + hash: &str, + previous_hash: &str, + entity_id: &str, + table_name: &str, + operation: &str, + actor: &str, + timestamp: &DateTime, + before_snapshot: Option<&str>, + transformation: Option<&str>, +) -> rusqlite::Result<()> { + conn.execute( "INSERT INTO verisimdb_provenance_log \ (hash, previous_hash, entity_id, table_name, operation, actor, timestamp, \ before_snapshot, transformation) \ VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)", params![ hash, - prev_hash, + previous_hash, entity_id, table_name, operation, @@ -138,85 +336,104 @@ pub fn append_provenance( transformation, ], )?; - - tx.execute( - "INSERT OR REPLACE INTO verisimdb_provenance_chain_head (entity_id, head_hash) \ - VALUES (?1, ?2)", - params![entity_id, hash], - )?; - - tx.commit()?; - Ok(hash) + Ok(()) } -/// Verify that the chain for `entity_id` is internally consistent. +/// Verify that every branch of `entity_id`'s chain is internally +/// hash-consistent (ADR-0010 §3). /// -/// Walks the log in timestamp order; for each entry, recomputes the -/// hash from its stored fields and checks (a) the recomputed hash -/// matches the stored hash, (b) the `previous_hash` field matches the -/// hash of the prior entry in the walk (or `""` for genesis). +/// A forked entity is **not** a tampered one: linearity is not the +/// integrity property — tamper-evidence and no-silent-loss are. So +/// rather than assume a single linear walk, this builds the entity's +/// `hash → entry` map, identifies every branch tip (a hash that is no +/// row's `previous_hash`, unioned with the recorded head set), and +/// walks each tip back to a genesis (`previous_hash == ""`). Each node +/// on every branch must (a) recompute to its stored `hash`, and (b) +/// chain to a present predecessor (or genesis). Shared prefixes are +/// re-checked; correctness over micro-optimisation. /// -/// Returns `Ok(true)` iff the entire chain verifies; `Ok(false)` on the -/// first mismatch. Any SQL error propagates. +/// Returns `Ok(true)` iff *all* branches verify; `Ok(false)` on the +/// first inconsistency. An empty entity verifies vacuously. pub fn verify_chain(conn: &Connection, entity_id: &str) -> rusqlite::Result { + use std::collections::{HashMap, HashSet}; + + struct Node { + previous_hash: String, + operation: String, + actor: String, + ts_str: String, + before_snapshot: Option, + transformation: Option, + } + let mut stmt = conn.prepare( - "SELECT hash, previous_hash, entity_id, operation, actor, timestamp, \ + "SELECT hash, previous_hash, operation, actor, timestamp, \ before_snapshot, transformation \ - FROM verisimdb_provenance_log \ - WHERE entity_id = ?1 \ - ORDER BY timestamp ASC, hash ASC", + FROM verisimdb_provenance_log WHERE entity_id = ?1", )?; - - let rows = stmt.query_map([entity_id], |row| { + let mut nodes: HashMap = HashMap::new(); + let mut has_child: HashSet = HashSet::new(); + let iter = stmt.query_map([entity_id], |row| { Ok(( row.get::<_, String>(0)?, - row.get::<_, String>(1)?, - row.get::<_, String>(2)?, - row.get::<_, String>(3)?, - row.get::<_, String>(4)?, - row.get::<_, String>(5)?, - row.get::<_, Option>(6)?, - row.get::<_, Option>(7)?, + Node { + previous_hash: row.get::<_, String>(1)?, + operation: row.get::<_, String>(2)?, + actor: row.get::<_, String>(3)?, + ts_str: row.get::<_, String>(4)?, + before_snapshot: row.get::<_, Option>(5)?, + transformation: row.get::<_, Option>(6)?, + }, )) })?; - - let mut expected_prev = String::new(); - for row in rows { - let ( - stored_hash, - stored_prev, - entity_id, - operation, - actor, - ts_str, - before_snapshot, - transformation, - ) = row?; - - if stored_prev != expected_prev { - return Ok(false); + for r in iter { + let (hash, node) = r?; + if !node.previous_hash.is_empty() { + has_child.insert(node.previous_hash.clone()); } + nodes.insert(hash, node); + } + if nodes.is_empty() { + return Ok(true); // vacuous + } - let timestamp = match DateTime::parse_from_rfc3339(&ts_str) { - Ok(t) => t.with_timezone(&Utc), - Err(_) => return Ok(false), - }; - - let recomputed = ProvenanceEntry::compute_hash( - &stored_prev, - &entity_id, - &operation, - &actor, - ×tamp, - before_snapshot.as_deref(), - transformation.as_deref(), - ); - - if recomputed != stored_hash { - return Ok(false); + // Tips = recorded heads ∪ any hash nothing chains from. + let mut tips: HashSet = head_set(conn, entity_id)?.into_iter().collect(); + for h in nodes.keys() { + if !has_child.contains(h) { + tips.insert(h.clone()); } + } - expected_prev = stored_hash; + for tip in tips { + let mut cursor = tip; + loop { + let Some(node) = nodes.get(&cursor) else { + // A tip recorded in the head set with no log row, or a + // dangling previous_hash: broken chain. + return Ok(false); + }; + let timestamp = match DateTime::parse_from_rfc3339(&node.ts_str) { + Ok(t) => t.with_timezone(&Utc), + Err(_) => return Ok(false), + }; + let recomputed = ProvenanceEntry::compute_hash( + &node.previous_hash, + entity_id, + &node.operation, + &node.actor, + ×tamp, + node.before_snapshot.as_deref(), + node.transformation.as_deref(), + ); + if recomputed != cursor { + return Ok(false); + } + if node.previous_hash.is_empty() { + break; // reached genesis: this branch is consistent + } + cursor = node.previous_hash.clone(); + } } Ok(true) @@ -249,7 +466,9 @@ mod tests { |r| r.get(0), ) .unwrap(); - assert_eq!(count, 2, "expected 2 provenance tables"); + // log + legacy single-head + multi-head set (ADR-0010 keeps the + // legacy table one release for non-destructive migration). + assert_eq!(count, 3, "expected 3 provenance tables"); } #[test] @@ -268,14 +487,14 @@ mod tests { .unwrap(); assert_eq!(prev, "", "genesis must chain from empty previous_hash"); - let head: String = conn - .query_row( - "SELECT head_hash FROM verisimdb_provenance_chain_head WHERE entity_id='e1'", - [], - |r| r.get(0), - ) - .unwrap(); - assert_eq!(head, hash, "chain head must point at the new entry"); + let heads: Vec = { + let mut s = conn + .prepare("SELECT head_hash FROM verisimdb_provenance_chain_heads WHERE entity_id='e1'") + .unwrap(); + let r = s.query_map([], |x| x.get::<_, String>(0)).unwrap(); + r.collect::>().unwrap() + }; + assert_eq!(heads, vec![hash], "genesis must record exactly one head"); } #[test] @@ -297,14 +516,18 @@ mod tests { assert_ne!(h1, h2); assert_ne!(h2, h3); - let head: String = conn - .query_row( - "SELECT head_hash FROM verisimdb_provenance_chain_head WHERE entity_id='e1'", - [], - |r| r.get(0), - ) - .unwrap(); - assert_eq!(head, h3); + let heads: Vec = { + let mut s = conn + .prepare("SELECT head_hash FROM verisimdb_provenance_chain_heads WHERE entity_id='e1'") + .unwrap(); + let r = s.query_map([], |x| x.get::<_, String>(0)).unwrap(); + r.collect::>().unwrap() + }; + assert_eq!( + heads, + vec![h3.clone()], + "a linear chain advances its single head, never accumulates tips" + ); assert!( verify_chain(&conn, "e1").unwrap(), diff --git a/tests/provenance_fork_test.rs b/tests/provenance_fork_test.rs index 8a4e7da..ab662cb 100755 --- a/tests/provenance_fork_test.rs +++ b/tests/provenance_fork_test.rs @@ -1,34 +1,22 @@ // SPDX-License-Identifier: PMPL-1.0-or-later // Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) // -// FAILING-BY-DESIGN test for the fork-impossibility defect -// (#31 + #32, see docs/decisions/0010-provenance-forks-are-first-class.adoc). +// ADR-0010 (provenance forks are first-class) acceptance suite for +// #31 (and #32, superseded). Provenance forks — two valid children of +// the same predecessor (network-partitioned honest writers, replicas, +// simulation branches) — must be representable, persisted, detectable, +// and verifiable. The integrity property is tamper-evidence and +// no-silent-loss, NOT linearity. // -// This test encodes the *desired* behaviour: a legitimate provenance -// fork (two valid children of the same predecessor — e.g. two -// network-partitioned honest writers, or a simulation branch) must be -// representable, persisted, and detectable. -// -// It is EXPECTED TO FAIL on `main` today, because: -// * `verisimdb_provenance_chain_head` has `entity_id` as PRIMARY KEY, -// so an entity can only ever record ONE head — the second branch's -// head is silently overwritten (INSERT OR REPLACE). -// * there is no fork-aware append / detection surface. -// * if #32's `UNIQUE INDEX(entity_id, previous_hash)` were applied, -// the second child insert would additionally fail with a -// constraint violation. -// -// The implementing PR for #31/#32 makes this test pass (multi-head -// table + fork-aware append + `fork_points`). Until then it documents -// the defect in executable form. -// -// It compiles against the *current* public surface so CI exercises it -// rather than ignoring it; the assertions — not the compile — are what -// fail. +// These tests were failing-by-design on `main`; the #31 implementation +// (multi-head tip set + fork-aware append + fork_points + per-branch +// verify) makes them pass. -use rusqlite::{params, Connection}; +use rusqlite::{Connection, params}; use verisimiser::abi::ProvenanceEntry; -use verisimiser::tier1::provenance::{append_provenance, init_sidecar_schema}; +use verisimiser::tier1::provenance::{ + append_provenance, append_provenance_fork, fork_points, init_sidecar_schema, verify_chain, +}; fn open_sidecar() -> Connection { let conn = Connection::open_in_memory().expect("open in-memory sidecar"); @@ -36,20 +24,17 @@ fn open_sidecar() -> Connection { conn } -/// Count chain heads recorded for an entity. Today this can only ever -/// be 0 or 1 because `entity_id` is the PRIMARY KEY of the head table; -/// the target design records one row per live branch tip. +/// Rows in the multi-head tip set for `entity_id`. fn head_count(conn: &Connection, entity_id: &str) -> i64 { conn.query_row( - "SELECT COUNT(*) FROM verisimdb_provenance_chain_head WHERE entity_id = ?1", + "SELECT COUNT(*) FROM verisimdb_provenance_chain_heads WHERE entity_id = ?1", [entity_id], |r| r.get(0), ) .unwrap_or(0) } -/// Number of rows in the log whose `previous_hash` is `parent` — i.e. -/// how many children that node has. > 1 ==> a fork at `parent`. +/// Children of `parent` (rows whose `previous_hash = parent`). > 1 ⇒ fork. fn child_count(conn: &Connection, entity_id: &str, parent: &str) -> i64 { conn.query_row( "SELECT COUNT(*) FROM verisimdb_provenance_log \ @@ -60,64 +45,130 @@ fn child_count(conn: &Connection, entity_id: &str, parent: &str) -> i64 { .unwrap_or(0) } +fn log_count(conn: &Connection, entity_id: &str) -> i64 { + conn.query_row( + "SELECT COUNT(*) FROM verisimdb_provenance_log WHERE entity_id = ?1", + [entity_id], + |r| r.get(0), + ) + .unwrap_or(0) +} + #[test] fn fork_can_be_written_and_both_branches_persist() { let mut conn = open_sidecar(); let entity = "account:42"; - // Genesis + one normal child via the supported linear path. - let genesis = append_provenance( - &mut conn, entity, "accounts", "insert", "alice", None, None, - ) - .expect("genesis append"); - let _branch_a = append_provenance( - &mut conn, entity, "accounts", "update", "alice", None, None, - ) - .expect("branch A append"); - - // A second, legitimate writer (partitioned from the first) extends - // the chain from the SAME genesis tip: a fork. There is no - // supported API for "chain from this specific ancestor" yet, so we - // construct the entry the way the target `append_provenance_fork` - // will and write it directly. The hash is canonical and the row is - // internally valid — it is honest history, not tampering. - let ts = chrono::Utc::now(); - let branch_b_hash = ProvenanceEntry::compute_hash( - &genesis, entity, "update", "bob", &ts, None, None, - ); - conn.execute( - "INSERT INTO verisimdb_provenance_log \ - (hash, previous_hash, entity_id, table_name, operation, actor, \ - timestamp, before_snapshot, transformation) \ - VALUES (?1, ?2, ?3, 'accounts', 'update', 'bob', ?4, NULL, NULL)", - params![branch_b_hash, genesis, entity, ts.to_rfc3339()], - ) - .expect("fork row insert (fails here once #32 unique index is added)"); - - // The target design also records branch B's head. Today the head - // table cannot hold two heads for one entity (entity_id is PK), so - // we attempt the insert the implementing PR will do. - let _ = conn.execute( - "INSERT INTO verisimdb_provenance_chain_head (entity_id, head_hash) \ - VALUES (?1, ?2)", - params![entity, branch_b_hash], - ); + // Genesis, then a normal linear child (branch A) off it. + let genesis = + append_provenance(&mut conn, entity, "accounts", "insert", "alice", None, None) + .expect("genesis append"); + let _branch_a = + append_provenance(&mut conn, entity, "accounts", "update", "alice", None, None) + .expect("branch A append (linear, off genesis)"); - // --- Desired-behaviour assertions (expected to FAIL on main) --- + // A second, partitioned-but-honest writer extends the chain from + // the SAME genesis tip — a legitimate fork via the explicit API. + let branch_b = append_provenance_fork( + &mut conn, entity, "accounts", "update", "bob", None, None, &genesis, + ) + .expect("branch B append (fork from genesis)"); - // Both children of genesis must be retained: this is a true fork. + // Genesis must have two children — the fork is representable, not + // silently collapsed. assert_eq!( child_count(&conn, entity, &genesis), 2, - "genesis must have two children (branch A + branch B) — the \ - fork must be representable, not silently collapsed", + "genesis must have two children (branch A + branch B)" ); - - // The entity now has two live branch tips; both must be tracked. + // Three log rows total: genesis, A, B. + assert_eq!(log_count(&conn, entity), 3, "all three entries persist"); + // Two live branch tips, both tracked (linear A-tip + fork B-tip). assert_eq!( head_count(&conn, entity), 2, - "a forked entity must record one head per branch; today the \ - single-row-per-entity head table cannot express this (#31)", + "a forked entity records one head per branch" + ); + assert!(!branch_b.is_empty()); +} + +#[test] +fn fork_points_detects_the_divergence() { + let mut conn = open_sidecar(); + let entity = "doc:7"; + + let genesis = append_provenance(&mut conn, entity, "docs", "insert", "a", None, None).unwrap(); + let _a = append_provenance(&mut conn, entity, "docs", "update", "a", None, None).unwrap(); + let _b = append_provenance_fork( + &mut conn, entity, "docs", "update", "b", None, None, &genesis, + ) + .unwrap(); + + let forks = fork_points(&conn, entity).expect("fork_points query"); + assert_eq!(forks.len(), 1, "exactly one divergence point"); + assert_eq!(forks[0].predecessor, genesis, "the fork is at genesis"); + assert_eq!(forks[0].children, 2, "genesis has two children"); + + // A purely linear entity has no fork points. + let mut c2 = open_sidecar(); + append_provenance(&mut c2, "lin:1", "t", "insert", "a", None, None).unwrap(); + append_provenance(&mut c2, "lin:1", "t", "update", "a", None, None).unwrap(); + assert!(fork_points(&c2, "lin:1").unwrap().is_empty()); +} + +#[test] +fn each_branch_verifies_independently() { + let mut conn = open_sidecar(); + let entity = "sim:1"; + + let genesis = append_provenance(&mut conn, entity, "t", "insert", "a", None, None).unwrap(); + append_provenance(&mut conn, entity, "t", "update", "a", None, None).unwrap(); + append_provenance_fork(&mut conn, entity, "t", "transform", "b", None, None, &genesis) + .unwrap(); + + // Divergence is not tampering: every branch is hash-consistent, so + // the forked entity must still verify true. + assert!( + verify_chain(&conn, entity).expect("verify forked entity"), + "a forked-but-honest history must verify (each branch consistent)" + ); + + // Tampering one branch's row must still be caught. + conn.execute( + "UPDATE verisimdb_provenance_log SET actor = 'mallory' \ + WHERE entity_id = ?1 AND actor = 'b'", + [entity], + ) + .unwrap(); + assert!( + !verify_chain(&conn, entity).unwrap(), + "tampering a forked branch must still fail verification" + ); +} + +#[test] +fn exact_duplicate_entry_is_rejected() { + let conn = open_sidecar(); + let entity = "dup:1"; + let ts = chrono::Utc::now(); + let hash = ProvenanceEntry::compute_hash("", entity, "insert", "a", &ts, None, None); + + let insert = |h: &str| { + conn.execute( + "INSERT INTO verisimdb_provenance_log \ + (hash, previous_hash, entity_id, table_name, operation, actor, \ + timestamp, before_snapshot, transformation) \ + VALUES (?1, '', ?2, 't', 'insert', 'a', ?3, NULL, NULL)", + params![h, entity, ts.to_rfc3339()], + ) + }; + + insert(&hash).expect("first insert of a unique entry succeeds"); + // A byte-identical entry has the same domain-tagged hash and so + // collides on the `hash` PRIMARY KEY — the correct duplicate guard + // the superseded UNIQUE index was trying (wrongly) to provide. + assert!( + insert(&hash).is_err(), + "an exact-duplicate entry must be rejected by the hash PK" ); }