diff --git a/src/interface/parse/WIRE-FORMAT.adoc b/src/interface/parse/WIRE-FORMAT.adoc new file mode 100644 index 0000000..92794dc --- /dev/null +++ b/src/interface/parse/WIRE-FORMAT.adoc @@ -0,0 +1,98 @@ += VCL-total Statement Wire Format v1 (P5b, vcl-ut#25) +:toc: + +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +The C-ABI marshalling format for a parsed `Statement`. The Rust +encoder/decoder (`wire.rs`) is normative for v1; the P5b-step-2 Idris2 +decoder MUST decode this identical byte stream into `Grammar.idr`'s +`Statement` (the certified type), proven total. Cross-language +conformance is by shared golden vectors. + +Design: a deterministic, versioned, length-prefixed binary TLV. +*Rationale:* no parser dependency / no extra trust surface (vs CBOR), +fully auditable, byte-exact, total decode. This is the boundary the +hypatia<->verisim seam crosses; it is specified, not improvised. + +== Conventions + +* All multi-byte integers are *little-endian, fixed width*. +* `u8` = 1 byte; `u32` = 4 bytes; `u64`/`i64` = 8 bytes (`i64` + two's-complement); `f64` = 8 bytes IEEE-754 (`f64::to_bits`, + bit-exact — NaN/inf preserved). +* `bool` = 1 byte, `0x00` false / `0x01` true (any other byte => + `WireError::BadBool`). +* `string` = `u32` byte length, then that many UTF-8 bytes. +* `option` = `0x00` (none) | `0x01` (some) then `T`. +* `list` = `u32` count, then that many `T`. +* Every sum type = a `u8` *discriminant* then its payload in the field + order below. Unknown discriminant => `WireError::BadTag`. + +== Header + +`56 43 4C 57` ("VCLW") then `u16` version. v1 = `01 00`. +Bad magic/version => `WireError::BadMagic` / `BadVersion`. + +== Discriminants (STABLE — never renumber; append only) + +Where `Grammar.idr` already defines an integer mapping it is reused so +the Idris decoder is a direct match: + +* `Modality` — `modalityToInt`: Graph 0, Vector 1, Tensor 2, + Semantic 3, Document 4, Temporal 5, Provenance 6, Spatial 7. +* `Agent` — `agentToInt` tag: Engine 0, Prover 1 `+string`, + Validator 2, User 3 `+string`, Federation 4. +* `EpistemicOp` — `epistemicOpToInt`: Knows 0, Believes 1, + CommonKnowledge 2. +* `EpistemicRequirement` — `epReqToInt`: Knows 0 `agent,expr`, + Believes 1 `agent,expr`, Common 2 `expr`, Entails 3 + `agent,agent,expr`. +* `SafetyLevel` — `safetyLevelToInt` (Types.idr): ParseSafe 0 … + EpistemicSafe 10. + +Defined here for v1 (no Idris int map existed; now normative): + +* `Literal`: Str 0 `string`, Int 1 `i64`, Float 2 `f64`, Bool 3 + `bool`, Null 4, Vector 5 `list`. +* `CompOp`: Eq 0, NotEq 1, Lt 2, Gt 3, LtEq 4, GtEq 5, Like 6, In 7. +* `LogicOp`: And 0, Or 1, Not 2. +* `AggFunc`: Count 0, Sum 1, Avg 2, Min 3, Max 4. +* `Source`: Octad 0 `string`, Federation 1 `string`, Store 2 `string`. +* `ProofClause`: Attached 0, Witness 1 `string`, Assert 2 `expr`. +* `EffectDecl`: Read 0, Write 1, ReadWrite 2, Consume 3. +* `VersionConstraint`: Latest 0, AtLeast 1 `u64`, Exact 2 `u64`, + Range 3 `u64,u64`. +* `LinearAnnotation`: Unlimited 0, UseOnce 1, Bounded 2 `u64`. +* `SelectItem`: Field 0 `fieldref`, Modality 1 `modality`, + Aggregate 2 `aggfunc,expr`, Star 3. +* `Expr`: Field 0 `fieldref`, Literal 1 `literal`, Compare 2 + `compop,expr,expr`, Logic 3 `logicop,expr,option`, + Aggregate 4 `aggfunc,expr`, Param 5 `string`, Star 6, + Subquery 7 `statement`, Epistemic 8 `epistemicop,agent,expr`, + Announce 9 `agent,expr,expr`. + +Composite: `FieldRef` = `modality` then `string` (field name). +`EpistemicClause` = `list` then `list`. + +== Statement (field order = the `Grammar.idr` record) + +`list`, `Source`, `option` (where), +`list` (group by), `option` (having), +`list<(FieldRef, bool)>` (order by; bool = ascending), +`option` (limit), `option` (offset), +`option`, `option`, +`option`, `option`, +`option`, `SafetyLevel` (requested level). + +After the final field the stream MUST be exhausted; trailing bytes => +`WireError::TrailingBytes`. + +== Totality contract + +The decoder is *total*: bounds-checked, no panics, every malformed +input a typed `WireError` (same SPARK-grade lint posture as the +parser). `from_wire(to_wire(s)) == s` for every `Statement` (the +round-trip proptest is the machine witness; bit-exact float preservation +is pinned by a golden vector). `to_wire` is deterministic (canonical: +exactly one byte string per value). diff --git a/src/interface/parse/src/lib.rs b/src/interface/parse/src/lib.rs index 82828a4..5df1023 100644 --- a/src/interface/parse/src/lib.rs +++ b/src/interface/parse/src/lib.rs @@ -37,9 +37,11 @@ pub mod ast; pub mod lexer; pub mod parser; +pub mod wire; pub use ast::Statement; pub use parser::{parse, ParseError}; +pub use wire::{from_wire, to_wire, WireError}; #[cfg(test)] mod tests { diff --git a/src/interface/parse/src/wire.rs b/src/interface/parse/src/wire.rs new file mode 100644 index 0000000..1578248 --- /dev/null +++ b/src/interface/parse/src/wire.rs @@ -0,0 +1,845 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +//! P5b (#25): the C-ABI marshalling codec — `Statement` <-> a +//! deterministic, versioned, length-prefixed binary TLV. Normative +//! spec: `WIRE-FORMAT.adoc`. The P5b-step-2 Idris2 decoder must accept +//! this identical byte stream into `Grammar.idr`'s `Statement`. +//! +//! Same SPARK-grade posture as the parser (crate lint-set in lib.rs): +//! `to_wire` is total + deterministic; `from_wire` is **total** — +//! bounds-checked, no panic, no unbounded pre-allocation from untrusted +//! counts, every malformed input a typed [`WireError`]. + +use crate::ast::*; + +const MAGIC: [u8; 4] = *b"VCLW"; +const VERSION: u16 = 1; + +/// Total decode failure (never a panic). +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum WireError { + BadMagic, + BadVersion(u16), + Truncated, + BadTag { + ty: &'static str, + tag: u8, + }, + BadBool(u8), + BadUtf8, + /// A `usize` count/length that does not fit the platform or exceeds + /// the remaining input (anti-OOM: counts are validated, never + /// pre-allocated against). + LengthOverflow, + TrailingBytes, +} + +impl core::fmt::Display for WireError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + WireError::BadMagic => write!(f, "bad magic (not a VCLW stream)"), + WireError::BadVersion(v) => write!(f, "unsupported wire version {v}"), + WireError::Truncated => write!(f, "truncated input"), + WireError::BadTag { ty, tag } => { + write!(f, "bad {ty} discriminant {tag}") + } + WireError::BadBool(b) => write!(f, "bad bool byte {b}"), + WireError::BadUtf8 => write!(f, "invalid UTF-8 in string"), + WireError::LengthOverflow => write!(f, "length/count overflow"), + WireError::TrailingBytes => write!(f, "trailing bytes after statement"), + } + } +} +impl std::error::Error for WireError {} + +// ── Encoder (deterministic; canonical one-byte-string-per-value) ────── + +/// Serialise a `Statement` to the v1 wire format. Total. +pub fn to_wire(s: &Statement) -> Vec { + let mut o = Vec::new(); + o.extend_from_slice(&MAGIC); + o.extend_from_slice(&VERSION.to_le_bytes()); + enc_stmt(s, &mut o); + o +} + +fn put_u8(o: &mut Vec, b: u8) { + o.push(b); +} +fn put_bool(o: &mut Vec, b: bool) { + o.push(u8::from(b)); +} +fn put_u32(o: &mut Vec, n: u32) { + o.extend_from_slice(&n.to_le_bytes()); +} +fn put_u64(o: &mut Vec, n: u64) { + o.extend_from_slice(&n.to_le_bytes()); +} +fn put_i64(o: &mut Vec, n: i64) { + o.extend_from_slice(&n.to_le_bytes()); +} +fn put_f64(o: &mut Vec, n: f64) { + o.extend_from_slice(&n.to_bits().to_le_bytes()); +} +fn put_str(o: &mut Vec, s: &str) { + // A string longer than u32::MAX bytes is not representable; clamp by + // construction is impossible, so length is encoded saturating and + // the (astronomically unreachable) excess is truncated rather than + // panicking. Documented limit; never hit by real queries. + let bytes = s.as_bytes(); + let n = u32::try_from(bytes.len()).unwrap_or(u32::MAX); + put_u32(o, n); + match bytes.get(..n as usize) { + Some(slice) => o.extend_from_slice(slice), + None => o.extend_from_slice(bytes), + } +} +fn put_opt(o: &mut Vec, v: &Option, f: impl FnOnce(&T, &mut Vec)) { + match v { + None => o.push(0), + Some(x) => { + o.push(1); + f(x, o); + } + } +} +fn put_len(o: &mut Vec, len: usize) { + put_u32(o, u32::try_from(len).unwrap_or(u32::MAX)); +} + +fn enc_modality(m: &Modality, o: &mut Vec) { + put_u8( + o, + match m { + Modality::Graph => 0, + Modality::Vector => 1, + Modality::Tensor => 2, + Modality::Semantic => 3, + Modality::Document => 4, + Modality::Temporal => 5, + Modality::Provenance => 6, + Modality::Spatial => 7, + }, + ); +} + +fn enc_agent(a: &Agent, o: &mut Vec) { + match a { + Agent::Engine => put_u8(o, 0), + Agent::Prover(n) => { + put_u8(o, 1); + put_str(o, n); + } + Agent::Validator => put_u8(o, 2), + Agent::User(n) => { + put_u8(o, 3); + put_str(o, n); + } + Agent::Federation => put_u8(o, 4), + } +} + +fn enc_fieldref(fr: &FieldRef, o: &mut Vec) { + enc_modality(&fr.modality, o); + put_str(o, &fr.field_name); +} + +fn enc_literal(l: &Literal, o: &mut Vec) { + match l { + Literal::Str(s) => { + put_u8(o, 0); + put_str(o, s); + } + Literal::Int(n) => { + put_u8(o, 1); + put_i64(o, *n); + } + Literal::Float(x) => { + put_u8(o, 2); + put_f64(o, *x); + } + Literal::Bool(b) => { + put_u8(o, 3); + put_bool(o, *b); + } + Literal::Null => put_u8(o, 4), + Literal::Vector(v) => { + put_u8(o, 5); + put_len(o, v.len()); + for x in v { + put_f64(o, *x); + } + } + } +} + +fn comp_tag(c: CompOp) -> u8 { + match c { + CompOp::Eq => 0, + CompOp::NotEq => 1, + CompOp::Lt => 2, + CompOp::Gt => 3, + CompOp::LtEq => 4, + CompOp::GtEq => 5, + CompOp::Like => 6, + CompOp::In => 7, + } +} +fn logic_tag(l: LogicOp) -> u8 { + match l { + LogicOp::And => 0, + LogicOp::Or => 1, + LogicOp::Not => 2, + } +} +fn agg_tag(a: AggFunc) -> u8 { + match a { + AggFunc::Count => 0, + AggFunc::Sum => 1, + AggFunc::Avg => 2, + AggFunc::Min => 3, + AggFunc::Max => 4, + } +} +fn epi_tag(e: EpistemicOp) -> u8 { + match e { + EpistemicOp::Knows => 0, + EpistemicOp::Believes => 1, + EpistemicOp::CommonKnowledge => 2, + } +} + +fn enc_expr(e: &Expr, o: &mut Vec) { + match e { + Expr::Field(fr) => { + put_u8(o, 0); + enc_fieldref(fr, o); + } + Expr::Literal(l) => { + put_u8(o, 1); + enc_literal(l, o); + } + Expr::Compare(c, a, b) => { + put_u8(o, 2); + put_u8(o, comp_tag(*c)); + enc_expr(a, o); + enc_expr(b, o); + } + Expr::Logic(l, a, b) => { + put_u8(o, 3); + put_u8(o, logic_tag(*l)); + enc_expr(a, o); + put_opt(o, b, |x, oo| enc_expr(x, oo)); + } + Expr::Aggregate(a, x) => { + put_u8(o, 4); + put_u8(o, agg_tag(*a)); + enc_expr(x, o); + } + Expr::Param(n) => { + put_u8(o, 5); + put_str(o, n); + } + Expr::Star => put_u8(o, 6), + Expr::Subquery(s) => { + put_u8(o, 7); + enc_stmt(s, o); + } + Expr::Epistemic(op, ag, x) => { + put_u8(o, 8); + put_u8(o, epi_tag(*op)); + enc_agent(ag, o); + enc_expr(x, o); + } + Expr::Announce(ag, p, b) => { + put_u8(o, 9); + enc_agent(ag, o); + enc_expr(p, o); + enc_expr(b, o); + } + } +} + +fn enc_select_item(si: &SelectItem, o: &mut Vec) { + match si { + SelectItem::Field(fr) => { + put_u8(o, 0); + enc_fieldref(fr, o); + } + SelectItem::Modality(m) => { + put_u8(o, 1); + enc_modality(m, o); + } + SelectItem::Aggregate(a, e) => { + put_u8(o, 2); + put_u8(o, agg_tag(*a)); + enc_expr(e, o); + } + SelectItem::Star => put_u8(o, 3), + } +} + +fn enc_source(s: &Source, o: &mut Vec) { + match s { + Source::Octad(x) => { + put_u8(o, 0); + put_str(o, x); + } + Source::Federation(x) => { + put_u8(o, 1); + put_str(o, x); + } + Source::Store(x) => { + put_u8(o, 2); + put_str(o, x); + } + } +} + +fn enc_proof(p: &ProofClause, o: &mut Vec) { + match p { + ProofClause::Attached => put_u8(o, 0), + ProofClause::Witness(s) => { + put_u8(o, 1); + put_str(o, s); + } + ProofClause::Assert(e) => { + put_u8(o, 2); + enc_expr(e, o); + } + } +} + +fn enc_effect(e: &EffectDecl, o: &mut Vec) { + put_u8( + o, + match e { + EffectDecl::Read => 0, + EffectDecl::Write => 1, + EffectDecl::ReadWrite => 2, + EffectDecl::Consume => 3, + }, + ); +} + +fn enc_version(v: &VersionConstraint, o: &mut Vec) { + match v { + VersionConstraint::Latest => put_u8(o, 0), + VersionConstraint::AtLeast(n) => { + put_u8(o, 1); + put_u64(o, *n); + } + VersionConstraint::Exact(n) => { + put_u8(o, 2); + put_u64(o, *n); + } + VersionConstraint::Range(a, b) => { + put_u8(o, 3); + put_u64(o, *a); + put_u64(o, *b); + } + } +} + +fn enc_linear(l: &LinearAnnotation, o: &mut Vec) { + match l { + LinearAnnotation::Unlimited => put_u8(o, 0), + LinearAnnotation::UseOnce => put_u8(o, 1), + LinearAnnotation::Bounded(n) => { + put_u8(o, 2); + put_u64(o, *n); + } + } +} + +fn enc_epi_req(r: &EpistemicRequirement, o: &mut Vec) { + match r { + EpistemicRequirement::Knows(a, e) => { + put_u8(o, 0); + enc_agent(a, o); + enc_expr(e, o); + } + EpistemicRequirement::Believes(a, e) => { + put_u8(o, 1); + enc_agent(a, o); + enc_expr(e, o); + } + EpistemicRequirement::Common(e) => { + put_u8(o, 2); + enc_expr(e, o); + } + EpistemicRequirement::Entails(a, b, e) => { + put_u8(o, 3); + enc_agent(a, o); + enc_agent(b, o); + enc_expr(e, o); + } + } +} + +fn enc_epi_clause(c: &EpistemicClause, o: &mut Vec) { + put_len(o, c.agents.len()); + for a in &c.agents { + enc_agent(a, o); + } + put_len(o, c.requirements.len()); + for r in &c.requirements { + enc_epi_req(r, o); + } +} + +fn safety_tag(s: SafetyLevel) -> u8 { + s as u8 +} + +fn enc_stmt(s: &Statement, o: &mut Vec) { + put_len(o, s.select_items.len()); + for si in &s.select_items { + enc_select_item(si, o); + } + enc_source(&s.source, o); + put_opt(o, &s.where_clause, enc_expr); + put_len(o, s.group_by.len()); + for fr in &s.group_by { + enc_fieldref(fr, o); + } + put_opt(o, &s.having, enc_expr); + put_len(o, s.order_by.len()); + for (fr, asc) in &s.order_by { + enc_fieldref(fr, o); + put_bool(o, *asc); + } + put_opt(o, &s.limit, |n, oo| put_u64(oo, *n)); + put_opt(o, &s.offset, |n, oo| put_u64(oo, *n)); + put_opt(o, &s.proof_clause, enc_proof); + put_opt(o, &s.effect_decl, enc_effect); + put_opt(o, &s.version_const, enc_version); + put_opt(o, &s.linear_annot, enc_linear); + put_opt(o, &s.epistemic_clause, enc_epi_clause); + put_u8(o, safety_tag(s.requested_level)); +} + +// ── Decoder (total: bounds-checked, no panic, no untrusted pre-alloc) ─ + +struct D<'a> { + b: &'a [u8], + pos: usize, +} + +impl<'a> D<'a> { + fn take(&mut self, n: usize) -> Result<&'a [u8], WireError> { + let end = self.pos.checked_add(n).ok_or(WireError::LengthOverflow)?; + let slice = self.b.get(self.pos..end).ok_or(WireError::Truncated)?; + self.pos = end; + Ok(slice) + } + fn u8(&mut self) -> Result { + Ok(*self.take(1)?.first().ok_or(WireError::Truncated)?) + } + fn boolean(&mut self) -> Result { + match self.u8()? { + 0 => Ok(false), + 1 => Ok(true), + x => Err(WireError::BadBool(x)), + } + } + fn arr8(&mut self) -> Result<[u8; 8], WireError> { + <[u8; 8]>::try_from(self.take(8)?).map_err(|_| WireError::Truncated) + } + fn u16(&mut self) -> Result { + let s = self.take(2)?; + let a = <[u8; 2]>::try_from(s).map_err(|_| WireError::Truncated)?; + Ok(u16::from_le_bytes(a)) + } + fn u32(&mut self) -> Result { + let s = self.take(4)?; + let a = <[u8; 4]>::try_from(s).map_err(|_| WireError::Truncated)?; + Ok(u32::from_le_bytes(a)) + } + fn u64(&mut self) -> Result { + Ok(u64::from_le_bytes(self.arr8()?)) + } + fn i64(&mut self) -> Result { + Ok(i64::from_le_bytes(self.arr8()?)) + } + fn f64(&mut self) -> Result { + Ok(f64::from_bits(u64::from_le_bytes(self.arr8()?))) + } + /// A length/count: read u32, validate it fits the *remaining* + /// input's worst case is the caller's job (each element does its + /// own bounds-check on read); we only convert without overflow and + /// never pre-allocate against it. + fn count(&mut self) -> Result { + usize::try_from(self.u32()?).map_err(|_| WireError::LengthOverflow) + } + fn string(&mut self) -> Result { + let n = self.count()?; + let bytes = self.take(n)?; + core::str::from_utf8(bytes) + .map(str::to_owned) + .map_err(|_| WireError::BadUtf8) + } + fn opt( + &mut self, + f: impl FnOnce(&mut Self) -> Result, + ) -> Result, WireError> { + match self.u8()? { + 0 => Ok(None), + 1 => Ok(Some(f(self)?)), + x => Err(WireError::BadTag { + ty: "option", + tag: x, + }), + } + } + /// Decode `count` items by repeated bounds-checked reads. No + /// pre-allocation from the untrusted count (anti-OOM): the vector + /// can only grow as fast as input is actually consumed. + fn vec( + &mut self, + mut f: impl FnMut(&mut Self) -> Result, + ) -> Result, WireError> { + let n = self.count()?; + let mut v = Vec::new(); + let mut i = 0usize; + while i < n { + v.push(f(self)?); + i = i.saturating_add(1); + } + Ok(v) + } +} + +/// Decode a v1 wire stream into a `Statement`. Total. +pub fn from_wire(input: &[u8]) -> Result { + let mut d = D { b: input, pos: 0 }; + if d.take(4)? != MAGIC { + return Err(WireError::BadMagic); + } + let ver = d.u16()?; + if ver != VERSION { + return Err(WireError::BadVersion(ver)); + } + let s = dec_stmt(&mut d)?; + if d.pos != d.b.len() { + return Err(WireError::TrailingBytes); + } + Ok(s) +} + +fn dec_modality(d: &mut D) -> Result { + match d.u8()? { + 0 => Ok(Modality::Graph), + 1 => Ok(Modality::Vector), + 2 => Ok(Modality::Tensor), + 3 => Ok(Modality::Semantic), + 4 => Ok(Modality::Document), + 5 => Ok(Modality::Temporal), + 6 => Ok(Modality::Provenance), + 7 => Ok(Modality::Spatial), + t => Err(WireError::BadTag { + ty: "Modality", + tag: t, + }), + } +} + +fn dec_agent(d: &mut D) -> Result { + match d.u8()? { + 0 => Ok(Agent::Engine), + 1 => Ok(Agent::Prover(d.string()?)), + 2 => Ok(Agent::Validator), + 3 => Ok(Agent::User(d.string()?)), + 4 => Ok(Agent::Federation), + t => Err(WireError::BadTag { + ty: "Agent", + tag: t, + }), + } +} + +fn dec_fieldref(d: &mut D) -> Result { + let modality = dec_modality(d)?; + let field_name = d.string()?; + Ok(FieldRef { + modality, + field_name, + }) +} + +fn dec_literal(d: &mut D) -> Result { + match d.u8()? { + 0 => Ok(Literal::Str(d.string()?)), + 1 => Ok(Literal::Int(d.i64()?)), + 2 => Ok(Literal::Float(d.f64()?)), + 3 => Ok(Literal::Bool(d.boolean()?)), + 4 => Ok(Literal::Null), + 5 => Ok(Literal::Vector(d.vec(|dd| dd.f64())?)), + t => Err(WireError::BadTag { + ty: "Literal", + tag: t, + }), + } +} + +fn dec_comp(d: &mut D) -> Result { + match d.u8()? { + 0 => Ok(CompOp::Eq), + 1 => Ok(CompOp::NotEq), + 2 => Ok(CompOp::Lt), + 3 => Ok(CompOp::Gt), + 4 => Ok(CompOp::LtEq), + 5 => Ok(CompOp::GtEq), + 6 => Ok(CompOp::Like), + 7 => Ok(CompOp::In), + t => Err(WireError::BadTag { + ty: "CompOp", + tag: t, + }), + } +} +fn dec_logic(d: &mut D) -> Result { + match d.u8()? { + 0 => Ok(LogicOp::And), + 1 => Ok(LogicOp::Or), + 2 => Ok(LogicOp::Not), + t => Err(WireError::BadTag { + ty: "LogicOp", + tag: t, + }), + } +} +fn dec_agg(d: &mut D) -> Result { + match d.u8()? { + 0 => Ok(AggFunc::Count), + 1 => Ok(AggFunc::Sum), + 2 => Ok(AggFunc::Avg), + 3 => Ok(AggFunc::Min), + 4 => Ok(AggFunc::Max), + t => Err(WireError::BadTag { + ty: "AggFunc", + tag: t, + }), + } +} +fn dec_epi_op(d: &mut D) -> Result { + match d.u8()? { + 0 => Ok(EpistemicOp::Knows), + 1 => Ok(EpistemicOp::Believes), + 2 => Ok(EpistemicOp::CommonKnowledge), + t => Err(WireError::BadTag { + ty: "EpistemicOp", + tag: t, + }), + } +} + +fn dec_expr(d: &mut D) -> Result { + match d.u8()? { + 0 => Ok(Expr::Field(dec_fieldref(d)?)), + 1 => Ok(Expr::Literal(dec_literal(d)?)), + 2 => { + let c = dec_comp(d)?; + let a = Box::new(dec_expr(d)?); + let b = Box::new(dec_expr(d)?); + Ok(Expr::Compare(c, a, b)) + } + 3 => { + let l = dec_logic(d)?; + let a = Box::new(dec_expr(d)?); + let b = d.opt(|dd| dec_expr(dd).map(Box::new))?; + Ok(Expr::Logic(l, a, b)) + } + 4 => { + let a = dec_agg(d)?; + Ok(Expr::Aggregate(a, Box::new(dec_expr(d)?))) + } + 5 => Ok(Expr::Param(d.string()?)), + 6 => Ok(Expr::Star), + 7 => Ok(Expr::Subquery(Box::new(dec_stmt(d)?))), + 8 => { + let op = dec_epi_op(d)?; + let ag = dec_agent(d)?; + Ok(Expr::Epistemic(op, ag, Box::new(dec_expr(d)?))) + } + 9 => { + let ag = dec_agent(d)?; + let p = Box::new(dec_expr(d)?); + let b = Box::new(dec_expr(d)?); + Ok(Expr::Announce(ag, p, b)) + } + t => Err(WireError::BadTag { ty: "Expr", tag: t }), + } +} + +fn dec_select_item(d: &mut D) -> Result { + match d.u8()? { + 0 => Ok(SelectItem::Field(dec_fieldref(d)?)), + 1 => Ok(SelectItem::Modality(dec_modality(d)?)), + 2 => { + let a = dec_agg(d)?; + Ok(SelectItem::Aggregate(a, dec_expr(d)?)) + } + 3 => Ok(SelectItem::Star), + t => Err(WireError::BadTag { + ty: "SelectItem", + tag: t, + }), + } +} + +fn dec_source(d: &mut D) -> Result { + match d.u8()? { + 0 => Ok(Source::Octad(d.string()?)), + 1 => Ok(Source::Federation(d.string()?)), + 2 => Ok(Source::Store(d.string()?)), + t => Err(WireError::BadTag { + ty: "Source", + tag: t, + }), + } +} + +fn dec_proof(d: &mut D) -> Result { + match d.u8()? { + 0 => Ok(ProofClause::Attached), + 1 => Ok(ProofClause::Witness(d.string()?)), + 2 => Ok(ProofClause::Assert(dec_expr(d)?)), + t => Err(WireError::BadTag { + ty: "ProofClause", + tag: t, + }), + } +} + +fn dec_effect(d: &mut D) -> Result { + match d.u8()? { + 0 => Ok(EffectDecl::Read), + 1 => Ok(EffectDecl::Write), + 2 => Ok(EffectDecl::ReadWrite), + 3 => Ok(EffectDecl::Consume), + t => Err(WireError::BadTag { + ty: "EffectDecl", + tag: t, + }), + } +} + +fn dec_version(d: &mut D) -> Result { + match d.u8()? { + 0 => Ok(VersionConstraint::Latest), + 1 => Ok(VersionConstraint::AtLeast(d.u64()?)), + 2 => Ok(VersionConstraint::Exact(d.u64()?)), + 3 => { + let a = d.u64()?; + let b = d.u64()?; + Ok(VersionConstraint::Range(a, b)) + } + t => Err(WireError::BadTag { + ty: "VersionConstraint", + tag: t, + }), + } +} + +fn dec_linear(d: &mut D) -> Result { + match d.u8()? { + 0 => Ok(LinearAnnotation::Unlimited), + 1 => Ok(LinearAnnotation::UseOnce), + 2 => Ok(LinearAnnotation::Bounded(d.u64()?)), + t => Err(WireError::BadTag { + ty: "LinearAnnotation", + tag: t, + }), + } +} + +fn dec_epi_req(d: &mut D) -> Result { + match d.u8()? { + 0 => { + let a = dec_agent(d)?; + Ok(EpistemicRequirement::Knows(a, dec_expr(d)?)) + } + 1 => { + let a = dec_agent(d)?; + Ok(EpistemicRequirement::Believes(a, dec_expr(d)?)) + } + 2 => Ok(EpistemicRequirement::Common(dec_expr(d)?)), + 3 => { + let a = dec_agent(d)?; + let b = dec_agent(d)?; + Ok(EpistemicRequirement::Entails(a, b, dec_expr(d)?)) + } + t => Err(WireError::BadTag { + ty: "EpistemicRequirement", + tag: t, + }), + } +} + +fn dec_epi_clause(d: &mut D) -> Result { + let agents = d.vec(dec_agent)?; + let requirements = d.vec(dec_epi_req)?; + Ok(EpistemicClause { + agents, + requirements, + }) +} + +fn dec_safety(d: &mut D) -> Result { + match d.u8()? { + 0 => Ok(SafetyLevel::ParseSafe), + 1 => Ok(SafetyLevel::SchemaBound), + 2 => Ok(SafetyLevel::TypeCompat), + 3 => Ok(SafetyLevel::NullSafe), + 4 => Ok(SafetyLevel::InjectionProof), + 5 => Ok(SafetyLevel::ResultTyped), + 6 => Ok(SafetyLevel::CardinalitySafe), + 7 => Ok(SafetyLevel::EffectTracked), + 8 => Ok(SafetyLevel::TemporalSafe), + 9 => Ok(SafetyLevel::LinearSafe), + 10 => Ok(SafetyLevel::EpistemicSafe), + t => Err(WireError::BadTag { + ty: "SafetyLevel", + tag: t, + }), + } +} + +fn dec_stmt(d: &mut D) -> Result { + let select_items = d.vec(dec_select_item)?; + let source = dec_source(d)?; + let where_clause = d.opt(dec_expr)?; + let group_by = d.vec(dec_fieldref)?; + let having = d.opt(dec_expr)?; + let order_by = d.vec(|dd| { + let fr = dec_fieldref(dd)?; + let asc = dd.boolean()?; + Ok((fr, asc)) + })?; + let limit = d.opt(|dd| dd.u64())?; + let offset = d.opt(|dd| dd.u64())?; + let proof_clause = d.opt(dec_proof)?; + let effect_decl = d.opt(dec_effect)?; + let version_const = d.opt(dec_version)?; + let linear_annot = d.opt(dec_linear)?; + let epistemic_clause = d.opt(dec_epi_clause)?; + let requested_level = dec_safety(d)?; + Ok(Statement { + select_items, + source, + where_clause, + group_by, + having, + order_by, + limit, + offset, + proof_clause, + effect_decl, + version_const, + linear_annot, + epistemic_clause, + requested_level, + }) +} diff --git a/src/interface/parse/tests/wire.rs b/src/interface/parse/tests/wire.rs new file mode 100644 index 0000000..0b1a4ec --- /dev/null +++ b/src/interface/parse/tests/wire.rs @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +//! P5b (#25) wire-codec properties: the machine witnesses for +//! `WIRE-FORMAT.adoc`'s totality + bijection contract. +//! +//! * `roundtrip`: `from_wire(to_wire(s)) == s` over arbitrary +//! `Statement`s (the codec is a bijection on the Rust AST mirror). +//! * `decoder_total_on_garbage`: `from_wire` never panics on arbitrary +//! bytes — only `Ok`/`Err` (the trusted-boundary totality contract). +//! * `encoder_deterministic`: one canonical byte string per value. +//! * golden vectors incl. bit-exact non-finite float preservation. + +#![allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] + +use proptest::prelude::*; +use vcltotal_parse::ast::*; +use vcltotal_parse::{from_wire, to_wire}; + +fn modality() -> impl Strategy { + prop_oneof![ + Just(Modality::Graph), + Just(Modality::Vector), + Just(Modality::Tensor), + Just(Modality::Semantic), + Just(Modality::Document), + Just(Modality::Temporal), + Just(Modality::Provenance), + Just(Modality::Spatial), + ] +} + +fn agent() -> impl Strategy { + prop_oneof![ + Just(Agent::Engine), + ".*".prop_map(Agent::Prover), + Just(Agent::Validator), + ".*".prop_map(Agent::User), + Just(Agent::Federation), + ] +} + +fn fieldref() -> impl Strategy { + (modality(), ".*").prop_map(|(modality, field_name)| FieldRef { + modality, + field_name, + }) +} + +// Finite floats only: the round-trip oracle is `PartialEq`, and +// `NaN != NaN`. Bit-exact preservation of non-finite values (incl. inf) +// is pinned separately by `golden_bit_exact_floats`. +fn finite_f64() -> impl Strategy { + prop_oneof![ + Just(0.0_f64), + Just(-0.0_f64), + (-1e9_f64..1e9_f64), + Just(f64::MIN), + Just(f64::MAX), + ] +} + +fn literal() -> impl Strategy { + prop_oneof![ + ".*".prop_map(Literal::Str), + any::().prop_map(Literal::Int), + finite_f64().prop_map(Literal::Float), + any::().prop_map(Literal::Bool), + Just(Literal::Null), + proptest::collection::vec(finite_f64(), 0..4).prop_map(Literal::Vector), + ] +} + +fn compop() -> impl Strategy { + prop_oneof![ + Just(CompOp::Eq), + Just(CompOp::NotEq), + Just(CompOp::Lt), + Just(CompOp::Gt), + Just(CompOp::LtEq), + Just(CompOp::GtEq), + Just(CompOp::Like), + Just(CompOp::In), + ] +} +fn aggfunc() -> impl Strategy { + prop_oneof![ + Just(AggFunc::Count), + Just(AggFunc::Sum), + Just(AggFunc::Avg), + Just(AggFunc::Min), + Just(AggFunc::Max), + ] +} +fn epiop() -> impl Strategy { + prop_oneof![ + Just(EpistemicOp::Knows), + Just(EpistemicOp::Believes), + Just(EpistemicOp::CommonKnowledge), + ] +} + +/// A bounded, non-recursive statement for embedding inside `Subquery` +/// (no WHERE/HAVING/subquery) — keeps the generated tree finite. +fn leaf_stmt() -> impl Strategy { + ( + proptest::collection::vec( + prop_oneof![ + fieldref().prop_map(SelectItem::Field), + modality().prop_map(SelectItem::Modality), + Just(SelectItem::Star), + ], + 1..3, + ), + prop_oneof![".*".prop_map(Source::Octad), ".*".prop_map(Source::Store)], + ) + .prop_map(|(select_items, source)| Statement { + select_items, + source, + where_clause: None, + group_by: vec![], + having: None, + order_by: vec![], + limit: None, + offset: None, + proof_clause: None, + effect_decl: None, + version_const: None, + linear_annot: None, + epistemic_clause: None, + requested_level: SafetyLevel::ParseSafe, + }) +} + +fn expr() -> impl Strategy { + let leaf = prop_oneof![ + fieldref().prop_map(Expr::Field), + literal().prop_map(Expr::Literal), + ".*".prop_map(Expr::Param), + Just(Expr::Star), + leaf_stmt().prop_map(|s| Expr::Subquery(Box::new(s))), + (epiop(), agent(), literal()).prop_map(|(o, a, l)| Expr::Epistemic( + o, + a, + Box::new(Expr::Literal(l)) + )), + ]; + leaf.prop_recursive(4, 32, 3, |inner| { + prop_oneof![ + (compop(), inner.clone(), inner.clone()).prop_map(|(c, a, b)| Expr::Compare( + c, + Box::new(a), + Box::new(b) + )), + ( + prop_oneof![Just(LogicOp::And), Just(LogicOp::Or), Just(LogicOp::Not)], + inner.clone(), + proptest::option::of(inner.clone()), + ) + .prop_map(|(l, a, b)| Expr::Logic(l, Box::new(a), b.map(Box::new))), + (aggfunc(), inner.clone()).prop_map(|(a, e)| Expr::Aggregate(a, Box::new(e))), + (agent(), inner.clone(), inner).prop_map(|(ag, p, b)| Expr::Announce( + ag, + Box::new(p), + Box::new(b) + )), + ] + }) +} + +fn statement() -> impl Strategy { + let select = proptest::collection::vec( + prop_oneof![ + fieldref().prop_map(SelectItem::Field), + modality().prop_map(SelectItem::Modality), + (aggfunc(), expr()).prop_map(|(a, e)| SelectItem::Aggregate(a, e)), + Just(SelectItem::Star), + ], + 1..4, + ); + let source = prop_oneof![ + ".*".prop_map(Source::Octad), + ".*".prop_map(Source::Federation), + ".*".prop_map(Source::Store), + ]; + let safety = prop_oneof![ + Just(SafetyLevel::ParseSafe), + Just(SafetyLevel::InjectionProof), + Just(SafetyLevel::EpistemicSafe), + ]; + // Group into <=12-tuples for prop_map, then assemble. + let core = ( + select, + source, + proptest::option::of(expr()), + proptest::collection::vec(fieldref(), 0..3), + proptest::option::of(expr()), + proptest::collection::vec((fieldref(), any::()), 0..3), + proptest::option::of(any::()), + proptest::option::of(any::()), + ); + let ext = ( + proptest::option::of(prop_oneof![ + Just(ProofClause::Attached), + ".*".prop_map(ProofClause::Witness), + ]), + proptest::option::of(prop_oneof![ + Just(EffectDecl::Read), + Just(EffectDecl::Write), + Just(EffectDecl::ReadWrite), + Just(EffectDecl::Consume), + ]), + proptest::option::of(prop_oneof![ + Just(VersionConstraint::Latest), + any::().prop_map(VersionConstraint::AtLeast), + (any::(), any::()).prop_map(|(a, b)| VersionConstraint::Range(a, b)), + ]), + proptest::option::of(prop_oneof![ + Just(LinearAnnotation::Unlimited), + Just(LinearAnnotation::UseOnce), + any::().prop_map(LinearAnnotation::Bounded), + ]), + proptest::option::of( + ( + proptest::collection::vec(agent(), 0..3), + proptest::collection::vec( + prop_oneof![ + (agent(), literal()) + .prop_map(|(a, l)| EpistemicRequirement::Knows(a, Expr::Literal(l))), + literal().prop_map(|l| EpistemicRequirement::Common(Expr::Literal(l))), + ], + 0..3, + ), + ) + .prop_map(|(agents, requirements)| EpistemicClause { + agents, + requirements, + }), + ), + safety, + ); + (core, ext).prop_map(|(c, e)| Statement { + select_items: c.0, + source: c.1, + where_clause: c.2, + group_by: c.3, + having: c.4, + order_by: c.5, + limit: c.6, + offset: c.7, + proof_clause: e.0, + effect_decl: e.1, + version_const: e.2, + linear_annot: e.3, + epistemic_clause: e.4, + requested_level: e.5, + }) +} + +proptest! { + #![proptest_config(ProptestConfig::with_cases(2048))] + + /// The codec is a bijection on the Rust AST mirror. + #[test] + fn roundtrip(s in statement()) { + let bytes = to_wire(&s); + let back = from_wire(&bytes).expect("decode of own encoding must succeed"); + prop_assert_eq!(back, s); + } + + /// Encoding is canonical/deterministic. + #[test] + fn encoder_deterministic(s in statement()) { + prop_assert_eq!(to_wire(&s), to_wire(&s)); + } + + /// Totality: arbitrary bytes never panic the decoder. + #[test] + fn decoder_total_on_garbage(bytes in proptest::collection::vec(any::(), 0..2048)) { + let _ = from_wire(&bytes); + } + + /// Totality even with a valid header followed by garbage. + #[test] + fn decoder_total_with_valid_header(tail in proptest::collection::vec(any::(), 0..512)) { + let mut v = b"VCLW".to_vec(); + v.extend_from_slice(&1u16.to_le_bytes()); + v.extend_from_slice(&tail); + let _ = from_wire(&v); + } +} + +#[test] +fn golden_minimal() { + let s = Statement { + select_items: vec![SelectItem::Star], + source: Source::Store("main".to_string()), + where_clause: None, + group_by: vec![], + having: None, + order_by: vec![], + limit: None, + offset: None, + proof_clause: None, + effect_decl: None, + version_const: None, + linear_annot: None, + epistemic_clause: None, + requested_level: SafetyLevel::SchemaBound, + }; + let b = to_wire(&s); + assert_eq!(&b[0..4], b"VCLW"); + assert_eq!(from_wire(&b).unwrap(), s); +} + +#[test] +fn golden_bit_exact_floats() { + // Non-finite floats round-trip bit-exactly (codec encodes + // f64::to_bits). inf compares equal under PartialEq; NaN does not, + // so assert NaN by bits, others structurally. + for f in [f64::INFINITY, f64::NEG_INFINITY, 12_345.678_901_f64] { + let s = mk_float_stmt(f); + assert_eq!(from_wire(&to_wire(&s)).unwrap(), s); + } + let nan = mk_float_stmt(f64::NAN); + let back = from_wire(&to_wire(&nan)).unwrap(); + match (&back.where_clause, &nan.where_clause) { + (Some(Expr::Literal(Literal::Float(a))), Some(Expr::Literal(Literal::Float(b)))) => { + assert_eq!(a.to_bits(), b.to_bits(), "NaN must round-trip bit-exactly"); + } + _ => panic!("shape changed"), + } +} + +fn mk_float_stmt(f: f64) -> Statement { + Statement { + select_items: vec![SelectItem::Star], + source: Source::Store("s".to_string()), + where_clause: Some(Expr::Literal(Literal::Float(f))), + group_by: vec![], + having: None, + order_by: vec![], + limit: None, + offset: None, + proof_clause: None, + effect_decl: None, + version_const: None, + linear_annot: None, + epistemic_clause: None, + requested_level: SafetyLevel::ParseSafe, + } +}