From a51e4f8bec88bd2664d46ca76a3efa194bc8103e Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Tue, 19 May 2026 14:51:52 +0100 Subject: [PATCH 1/3] vcl-ut #25 P5a: trusted Rust/SPARK-grade String->Statement parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First slice of the boundary-reinforcement workstream (#25). Adds a new workspace crate `src/interface/parse` (`vcltotal-parse`): a TOTAL, panic-free parser for the VCL-total relational core, the trust anchor that will feed the Idris2 certifier across the C-ABI (P5b/P5c). SPARK-grade posture (enforced statically, gated in CI): - #![forbid(unsafe_code)] - #![deny(clippy::{unwrap_used, expect_used, panic, indexing_slicing, arithmetic_side_effects, unreachable, todo, unimplemented, exit})] => no UB, no panics, no unchecked arithmetic; the only arithmetic is cursor saturating_add; every failure is a typed ParseError. - proptest panic-free invariant (4096 cases x 3 strategies incl. arbitrary bytes) = runtime witness of totality. Faithful AST mirror of src/core/Grammar.idr (ast.rs is 1:1 with the Idris constructors so the P5b marshaller is a structural map). Expr VqlType slots deliberately omitted — the parser establishes syntax only; type resolution + the L0-L10 decision stay the certifier's job. Coverage (first slice): SELECT/FROM/WHERE/GROUP BY/HAVING/ORDER BY/ LIMIT/OFFSET, full expression grammar (literals, params, *, fields, aggregates, comparisons, AND/OR/NOT, parens, scalar subqueries). The VCL-total extension clauses (PROOF/EFFECTS/AT VERSION/CONSUME| USAGE/EPISTEMIC) and modal expressions are FAIL-CLOSED: a typed "not yet supported" error, never a silently-wrong AST. Full grammar coverage iterates per the #25 roadmap. requested_level: a documented, conservative P5a policy (max feature-implied level), explicitly NOT a proven fact; authoritative reconciliation with the ReScript bridge is a later #25 iteration. Also: - .github/workflows/parse-gate.yml: hard CI gate (clippy -D warnings over the SPARK lint set + tests/proptest). - root Cargo.toml: add the workspace member. - PROOF-NEEDS.md: correct the ReScript-bridge nit (it is a standalone frontend, NOT on the verified path; the trusted parser is this crate). Reconciles with VERIFICATION-STANCE.adoc. Verified locally: cargo clippy -p vcltotal-parse --all-targets -D warnings (exit 0), cargo test -p vcltotal-parse (11/11), cargo fmt --check (clean). Refs hyperpolymath/vcl-ut#25, hyperpolymath/standards#124. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/parse-gate.yml | 53 +++ Cargo.toml | 1 + PROOF-NEEDS.md | 15 +- src/interface/parse/Cargo.toml | 28 ++ src/interface/parse/src/ast.rs | 228 ++++++++++++ src/interface/parse/src/lexer.rs | 270 +++++++++++++++ src/interface/parse/src/lib.rs | 127 +++++++ src/interface/parse/src/parser.rs | 535 +++++++++++++++++++++++++++++ src/interface/parse/tests/parse.rs | 80 +++++ 9 files changed, 1335 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/parse-gate.yml create mode 100644 src/interface/parse/Cargo.toml create mode 100644 src/interface/parse/src/ast.rs create mode 100644 src/interface/parse/src/lexer.rs create mode 100644 src/interface/parse/src/lib.rs create mode 100644 src/interface/parse/src/parser.rs create mode 100644 src/interface/parse/tests/parse.rs diff --git a/.github/workflows/parse-gate.yml b/.github/workflows/parse-gate.yml new file mode 100644 index 0000000..44881f9 --- /dev/null +++ b/.github/workflows/parse-gate.yml @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: PMPL-1.0-or-later +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) +# +# parse-gate.yml — Gates the trusted String->Statement parser +# (`vcltotal-parse`, P5a of #25 reinforcing the verified-corpus +# boundary). vcl-ut is the doubly-trusted hypatia<->verisim interface, +# so the boundary parser must stay panic-free / total: clippy's +# SPARK-grade lint set (no unsafe / unwrap / expect / panic / indexing / +# unchecked arithmetic) is a HARD gate, and the proptest panic-free +# invariant must pass. Tracked: hyperpolymath/standards#124, vcl-ut#25. + +name: Parse Gate + +on: + pull_request: + branches: ['**'] + push: + branches: [main, master] + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + parse-gate: + name: vcltotal-parse — panic-free / clippy / tests + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + # rustup is preinstalled on ubuntu-latest; no third-party action + # (avoids the action-pinning / deprecated-cache hazards). + - name: Pin toolchain + components + run: | + set -euo pipefail + rustup show active-toolchain || rustup default stable + rustup component add clippy + cargo --version && cargo clippy --version + + - name: Clippy — SPARK-grade lint set as a hard gate + run: | + set -euo pipefail + cargo clippy -p vcltotal-parse --all-targets -- -D warnings + + - name: Tests + panic-free proptest invariant + run: | + set -euo pipefail + cargo test -p vcltotal-parse diff --git a/Cargo.toml b/Cargo.toml index 5b986c7..a847712 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,4 +33,5 @@ members = [ "src/interface/lint", "src/interface/lsp", "src/interface/dap", + "src/interface/parse", ] diff --git a/PROOF-NEEDS.md b/PROOF-NEEDS.md index 20ff595..065562b 100644 --- a/PROOF-NEEDS.md +++ b/PROOF-NEEDS.md @@ -33,8 +33,19 @@ - Debug adapter and formatter — lower priority but should preserve query semantics ### ReScript Bridge (src/bridges/) -- `VclTotalParser.res`, `VclTotalBridge.res` — bridge between ReScript frontend and Idris2/Rust core -- Prove: bridge faithfully translates between ReScript and core representations +- `VclTotalParser.res`, `VclTotalBridge.res` — a **standalone ReScript + frontend** (a working recursive-descent parser producing a *ReScript* + AST). It is **not** on the verified path: it does not connect to, or + marshal into, the Idris2 `Statement` the proof corpus certifies, nor + the Rust core. (`VERIFICATION-STANCE.adoc` "no string→`Statement` + parser exists" is precise — it means no parser whose output is the + *certified* Idris2 `Statement`.) +- The *trusted* boundary parser is the Rust/SPARK-grade + `src/interface/parse` crate (`vcltotal-parse`, P5a of vcl-ut#25), + which mirrors `Grammar.idr` and feeds the certifier across the C-ABI. +- Optional future work: prove the ReScript frontend faithfully tracks + the Idris2 grammar (low priority; it is a convenience frontend, not a + trust anchor). ## Recommended Prover diff --git a/src/interface/parse/Cargo.toml b/src/interface/parse/Cargo.toml new file mode 100644 index 0000000..7286666 --- /dev/null +++ b/src/interface/parse/Cargo.toml @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: PMPL-1.0-or-later +[package] +name = "vcltotal-parse" +version = "0.1.0" +edition = "2021" +license = "PMPL-1.0-or-later" +authors = ["Jonathan D.A. Jewell "] +repository = "https://github.com/hyperpolymath/vcl-ut" +description = "P5a (#25): trusted, panic-free String -> Statement parser for VCL-total; faithful Rust mirror of src/core/Grammar.idr, feeding the Idris2 certifier across the C-ABI." + +[lib] +name = "vcltotal_parse" +path = "src/lib.rs" + +[dependencies] + +[dev-dependencies] +proptest = "1" + +# SPARK-grade posture is enforced STATICALLY in lib.rs: +# `#![forbid(unsafe_code)]` + `#![deny(clippy::arithmetic_side_effects, +# unwrap_used, expect_used, panic, indexing_slicing, unreachable, todo, +# unimplemented, exit)]`. `arithmetic_side_effects` rejects every +# unchecked +/-/* on integers at compile time; the only arithmetic in +# the crate is cursor `saturating_add`. The `proptest` panic-free +# invariant (tests/parse.rs, 4096 cases x 3 strategies) is the runtime +# witness. No `[profile]` here: workspace members' profiles are ignored +# by Cargo, so the guarantee is the static lint set, not a build flag. diff --git a/src/interface/parse/src/ast.rs b/src/interface/parse/src/ast.rs new file mode 100644 index 0000000..14f4350 --- /dev/null +++ b/src/interface/parse/src/ast.rs @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +//! Faithful Rust mirror of the Idris2 query AST in `src/core/Grammar.idr` +//! (the type the proof corpus certifies). One-to-one with the Idris +//! constructors so the P5b C-ABI marshaller is a structural map with no +//! semantic re-interpretation. +//! +//! TYPE ANNOTATIONS ARE DELIBERATELY ABSENT. In `Grammar.idr` every +//! `Expr` carries a `VqlType`, but the grammar comment is explicit that +//! it "initially [is] TAny, resolved during type checking at Level 2+". +//! A parser never resolves types, so encoding the slot here would be +//! dead, misleading data. The P5b marshaller sets every expression's +//! `VqlType` to `TAny`; the Idris certifier resolves it. This keeps the +//! parser's output honest about what parsing establishes (syntax only). + +/// `Grammar.idr`: `data Modality`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Modality { + Graph, + Vector, + Tensor, + Semantic, + Document, + Temporal, + Provenance, + Spatial, +} + +/// `Grammar.idr`: `data Agent`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Agent { + Engine, + Prover(String), + Validator, + User(String), + Federation, +} + +/// `Grammar.idr`: `data Literal`. Vectors keep `f64` (Idris `LitVector` +/// is `List Double`); `LitFloat` is `Double`. +#[derive(Debug, Clone, PartialEq)] +pub enum Literal { + Str(String), + Int(i64), + Float(f64), + Bool(bool), + Null, + Vector(Vec), +} + +/// `Grammar.idr`: `data CompOp`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CompOp { + Eq, + NotEq, + Lt, + Gt, + LtEq, + GtEq, + Like, + In, +} + +/// `Grammar.idr`: `data LogicOp`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum LogicOp { + And, + Or, + Not, +} + +/// `Grammar.idr`: `data AggFunc`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AggFunc { + Count, + Sum, + Avg, + Min, + Max, +} + +/// `Grammar.idr`: `data EpistemicOp`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum EpistemicOp { + Knows, + Believes, + CommonKnowledge, +} + +/// `Grammar.idr`: `record FieldRef`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FieldRef { + pub modality: Modality, + pub field_name: String, +} + +/// `Grammar.idr`: `data Expr` (the `mutual` block). `VqlType` slots are +/// omitted — see the module note. +#[derive(Debug, Clone, PartialEq)] +pub enum Expr { + Field(FieldRef), + Literal(Literal), + Compare(CompOp, Box, Box), + /// `ELogic`: And/Or carry two operands, Not carries one (`None` rhs). + Logic(LogicOp, Box, Option>), + Aggregate(AggFunc, Box), + Param(String), + Star, + Subquery(Box), + Epistemic(EpistemicOp, Agent, Box), + Announce(Agent, Box, Box), +} + +/// `Grammar.idr`: `data SelectItem`. +#[derive(Debug, Clone, PartialEq)] +pub enum SelectItem { + Field(FieldRef), + Modality(Modality), + Aggregate(AggFunc, Expr), + Star, +} + +/// `Grammar.idr`: `data Source`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Source { + Octad(String), + Federation(String), + Store(String), +} + +/// `Grammar.idr`: `data DriftPolicy` (defined in the grammar; not yet a +/// `Statement` field — mirrored for completeness/fidelity). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DriftPolicy { + Strict, + Repair, + Tolerate, + Latest, +} + +/// `Grammar.idr`: `data ProofClause`. +#[derive(Debug, Clone, PartialEq)] +pub enum ProofClause { + Attached, + Witness(String), + Assert(Expr), +} + +/// `Grammar.idr`: `data EffectDecl`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum EffectDecl { + Read, + Write, + ReadWrite, + Consume, +} + +/// `Grammar.idr`: `data VersionConstraint` (Idris `Nat` -> `u64`). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum VersionConstraint { + Latest, + AtLeast(u64), + Exact(u64), + Range(u64, u64), +} + +/// `Grammar.idr`: `data LinearAnnotation` (Idris `Nat` -> `u64`). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum LinearAnnotation { + Unlimited, + UseOnce, + Bounded(u64), +} + +/// `Grammar.idr`: `data EpistemicRequirement`. +#[derive(Debug, Clone, PartialEq)] +pub enum EpistemicRequirement { + Knows(Agent, Expr), + Believes(Agent, Expr), + Common(Expr), + Entails(Agent, Agent, Expr), +} + +/// `Grammar.idr`: `data EpistemicClause` (`EpClause agents requirements`). +#[derive(Debug, Clone, PartialEq)] +pub struct EpistemicClause { + pub agents: Vec, + pub requirements: Vec, +} + +/// `interface/abi/Types.idr`: `data SafetyLevel` (tags 0..=10, exactly +/// `safetyLevelToInt`). `requested_level` on `Statement` is a stored +/// field there, consumed by `Checker.certifyRequested`. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum SafetyLevel { + ParseSafe = 0, + SchemaBound = 1, + TypeCompat = 2, + NullSafe = 3, + InjectionProof = 4, + ResultTyped = 5, + CardinalitySafe = 6, + EffectTracked = 7, + TemporalSafe = 8, + LinearSafe = 9, + EpistemicSafe = 10, +} + +/// `Grammar.idr`: `record Statement` (`orderBy` is `(field, ascending?)`; +/// Idris `Nat` limit/offset -> `u64`). +#[derive(Debug, Clone, PartialEq)] +pub struct Statement { + pub select_items: Vec, + pub source: Source, + pub where_clause: Option, + pub group_by: Vec, + pub having: Option, + pub order_by: Vec<(FieldRef, bool)>, + pub limit: Option, + pub offset: Option, + pub proof_clause: Option, + pub effect_decl: Option, + pub version_const: Option, + pub linear_annot: Option, + pub epistemic_clause: Option, + pub requested_level: SafetyLevel, +} diff --git a/src/interface/parse/src/lexer.rs b/src/interface/parse/src/lexer.rs new file mode 100644 index 0000000..08def6b --- /dev/null +++ b/src/interface/parse/src/lexer.rs @@ -0,0 +1,270 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +//! Total, panic-free tokenizer. Every code path returns a `Result`; the +//! cursor advances only via `saturating_add` and reads only via +//! `slice::get`, so no input can panic or index out of bounds. + +/// One lexical token. Keyword vs identifier is left to the parser +/// (case-insensitive), so the lexer stays a pure character classifier. +#[derive(Debug, Clone, PartialEq)] +pub enum Tok { + /// Bare word: keyword or identifier (original case preserved). + Word(String), + /// Single-quoted string literal, contents unescaped. + Str(String), + /// Integer literal. + Int(i64), + /// Floating-point literal. + Float(f64), + /// `$name` or `$1` parameter placeholder (name without the `$`). + Param(String), + /// Punctuation / operator symbol, normalised (`<>` -> `!=`). + Sym(&'static str), +} + +/// A token plus its 0-based char start offset (for diagnostics). +#[derive(Debug, Clone, PartialEq)] +pub struct Spanned { + pub tok: Tok, + pub at: usize, +} + +/// Lexing failure with the offending char offset. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct LexError { + pub at: usize, + pub msg: String, +} + +struct Cur { + src: Vec, + pos: usize, +} + +impl Cur { + fn peek(&self) -> Option { + self.src.get(self.pos).copied() + } + fn peek2(&self) -> Option { + self.src.get(self.pos.saturating_add(1)).copied() + } + fn bump(&mut self) { + self.pos = self.pos.saturating_add(1); + } +} + +const SYMS_2: [(&str, &str); 3] = [("<=", "<="), (">=", ">="), ("<>", "!=")]; +// The VCL-total alphabet includes `{ } :` (e.g. `EFFECTS { Read }`, +// agent `prover:lean4`). The lexer accepts the whole alphabet; the +// *parser* enforces structure and fail-closes on unsupported clauses. +const SYMS_1: [char; 12] = ['(', ')', ',', '.', '*', ';', '=', '<', '>', '{', '}', ':']; + +/// Tokenize `input`. Returns every token in order; never panics. +pub fn lex(input: &str) -> Result, LexError> { + let mut c = Cur { + src: input.chars().collect(), + pos: 0, + }; + let mut out: Vec = Vec::new(); + + while let Some(ch) = c.peek() { + if ch.is_whitespace() { + c.bump(); + continue; + } + // Line comment: -- ... to end of line. + if ch == '-' && c.peek2() == Some('-') { + while let Some(x) = c.peek() { + if x == '\n' { + break; + } + c.bump(); + } + continue; + } + let at = c.pos; + + if ch == '\'' { + out.push(Spanned { + tok: lex_string(&mut c)?, + at, + }); + continue; + } + if ch == '$' { + out.push(Spanned { + tok: lex_param(&mut c)?, + at, + }); + continue; + } + if ch.is_ascii_digit() { + out.push(Spanned { + tok: lex_number(&mut c)?, + at, + }); + continue; + } + if ch == '_' || ch.is_alphabetic() { + out.push(Spanned { + tok: lex_word(&mut c), + at, + }); + continue; + } + if ch == '!' { + // Only `!=` is meaningful. + if c.peek2() == Some('=') { + c.bump(); + c.bump(); + out.push(Spanned { + tok: Tok::Sym("!="), + at, + }); + continue; + } + return Err(LexError { + at, + msg: "lone '!' (expected '!=')".to_string(), + }); + } + // Two-char symbols before one-char. + if let Some(rest) = c.peek2() { + let pair = [ch, rest].iter().collect::(); + if let Some(found) = SYMS_2.iter().find(|(p, _)| *p == pair) { + c.bump(); + c.bump(); + out.push(Spanned { + tok: Tok::Sym(found.1), + at, + }); + continue; + } + } + if let Some(sym) = SYMS_1.iter().find(|s| **s == ch) { + c.bump(); + // Map char -> &'static str without allocation. + let s: &'static str = match sym { + '(' => "(", + ')' => ")", + ',' => ",", + '.' => ".", + '*' => "*", + ';' => ";", + '=' => "=", + '<' => "<", + '>' => ">", + '{' => "{", + '}' => "}", + ':' => ":", + _ => "", + }; + out.push(Spanned { + tok: Tok::Sym(s), + at, + }); + continue; + } + + return Err(LexError { + at, + msg: format!("unexpected character {ch:?}"), + }); + } + Ok(out) +} + +fn lex_string(c: &mut Cur) -> Result { + let start = c.pos; + c.bump(); // opening quote + let mut buf = String::new(); + loop { + match c.peek() { + None => { + return Err(LexError { + at: start, + msg: "unterminated string literal".to_string(), + }) + } + Some('\'') => { + // '' is an escaped single quote. + if c.peek2() == Some('\'') { + buf.push('\''); + c.bump(); + c.bump(); + continue; + } + c.bump(); + return Ok(Tok::Str(buf)); + } + Some(other) => { + buf.push(other); + c.bump(); + } + } + } +} + +fn lex_param(c: &mut Cur) -> Result { + let at = c.pos; + c.bump(); // '$' + let mut buf = String::new(); + while let Some(x) = c.peek() { + if x == '_' || x.is_alphanumeric() { + buf.push(x); + c.bump(); + } else { + break; + } + } + if buf.is_empty() { + return Err(LexError { + at, + msg: "empty parameter name after '$'".to_string(), + }); + } + Ok(Tok::Param(buf)) +} + +fn lex_number(c: &mut Cur) -> Result { + let at = c.pos; + let mut buf = String::new(); + let mut is_float = false; + while let Some(x) = c.peek() { + if x.is_ascii_digit() { + buf.push(x); + c.bump(); + } else if x == '.' && !is_float && c.peek2().is_some_and(|d| d.is_ascii_digit()) { + is_float = true; + buf.push('.'); + c.bump(); + } else { + break; + } + } + if is_float { + buf.parse::().map(Tok::Float).map_err(|e| LexError { + at, + msg: format!("bad float literal: {e}"), + }) + } else { + buf.parse::().map(Tok::Int).map_err(|e| LexError { + at, + msg: format!("bad integer literal: {e}"), + }) + } +} + +fn lex_word(c: &mut Cur) -> Tok { + let mut buf = String::new(); + while let Some(x) = c.peek() { + if x == '_' || x.is_alphanumeric() { + buf.push(x); + c.bump(); + } else { + break; + } + } + Tok::Word(buf) +} diff --git a/src/interface/parse/src/lib.rs b/src/interface/parse/src/lib.rs new file mode 100644 index 0000000..82828a4 --- /dev/null +++ b/src/interface/parse/src/lib.rs @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +//! `vcltotal-parse` — P5a of issue #25 (reinforce the verified-corpus +//! boundary). A *trusted* `String -> Statement` parser for VCL-total, +//! held to a SPARK-grade posture so it can sit on the doubly-trusted +//! hypatia<->verisim path: +//! +//! * `#![forbid(unsafe_code)]` — no UB surface. +//! * No `unwrap`/`expect`/`panic`/`unreachable`/`todo`/`unimplemented`, +//! no slice indexing, no unchecked arithmetic (`clippy:: +//! arithmetic_side_effects` denied; the only arithmetic is cursor +//! `saturating_add`) — every failure is a typed +//! [`parser::ParseError`]; the parser is **total**. +//! * The `proptest` panic-free invariant (tests/parse.rs, 4096 cases x +//! 3 strategies) is the runtime witness that no input reaches a panic. +//! +//! Its [`ast`] is a one-to-one mirror of `src/core/Grammar.idr` (the +//! type the proof corpus certifies) so the P5b C-ABI marshaller is a +//! structural map. The parser establishes *syntax only*; type +//! resolution and the L0–L10 safety decision remain the Idris2 +//! certifier's job. + +#![forbid(unsafe_code)] +#![deny( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::indexing_slicing, + clippy::arithmetic_side_effects, + clippy::unreachable, + clippy::todo, + clippy::unimplemented, + clippy::exit +)] + +pub mod ast; +pub mod lexer; +pub mod parser; + +pub use ast::Statement; +pub use parser::{parse, ParseError}; + +#[cfg(test)] +mod tests { + // Test code may use the assertion/panic helpers the production + // lint-set forbids; the SPARK-grade posture binds the library only. + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::indexing_slicing, + clippy::arithmetic_side_effects + )] + use super::ast::*; + use super::parse; + + #[test] + fn minimal_select_from() { + let s = parse("SELECT * FROM STORE main").expect("valid"); + assert_eq!(s.select_items, vec![SelectItem::Star]); + assert_eq!(s.source, Source::Store("main".to_string())); + assert_eq!(s.requested_level, SafetyLevel::SchemaBound); + } + + #[test] + fn field_where_param_is_injection_safe_level() { + let s = parse("SELECT GRAPH.name FROM OCTAD x WHERE GRAPH.age > $1").expect("valid"); + assert_eq!( + s.select_items, + vec![SelectItem::Field(FieldRef { + modality: Modality::Graph, + field_name: "name".to_string(), + })] + ); + match s.where_clause { + Some(Expr::Compare(CompOp::Gt, _, _)) => {} + other => panic!("unexpected where: {other:?}"), + } + assert_eq!(s.requested_level, SafetyLevel::InjectionProof); + } + + #[test] + fn aggregate_and_clauses() { + let s = parse( + "SELECT COUNT(*) FROM HEXAD h \ + WHERE VECTOR.score >= 0.5 AND NOT DOCUMENT.deleted = TRUE \ + GROUP BY GRAPH.kind HAVING COUNT(*) > 3 \ + ORDER BY GRAPH.kind DESC LIMIT 10 OFFSET 5;", + ) + .expect("valid"); + assert_eq!(s.limit, Some(10)); + assert_eq!(s.offset, Some(5)); + assert_eq!(s.group_by.len(), 1); + assert!(s.having.is_some()); + assert_eq!(s.order_by.first().map(|(_, asc)| *asc), Some(false)); + // limit present, no higher feature -> CardinalitySafe. + assert_eq!(s.requested_level, SafetyLevel::CardinalitySafe); + } + + #[test] + fn subquery_in_where() { + let s = parse("SELECT * FROM STORE a WHERE GRAPH.id IN (SELECT GRAPH.id FROM STORE b)") + .expect("valid"); + match s.where_clause { + Some(Expr::Compare(CompOp::In, _, ref rhs)) => { + assert!(matches!(**rhs, Expr::Subquery(_))); + } + other => panic!("unexpected: {other:?}"), + } + } + + #[test] + fn extension_clause_is_fail_closed_not_wrong() { + let e = parse("SELECT * FROM STORE a EFFECTS { Read }").unwrap_err(); + assert!(e.msg.contains("EFFECTS"), "got: {e}"); + assert!(e.msg.contains("fail-closed"), "got: {e}"); + } + + #[test] + fn garbage_is_an_error_never_a_panic() { + assert!(parse("").is_err()); + assert!(parse("SELECT").is_err()); + assert!(parse("SELECT * FROM").is_err()); + assert!(parse("')) DROP everything --").is_err()); + } +} diff --git a/src/interface/parse/src/parser.rs b/src/interface/parse/src/parser.rs new file mode 100644 index 0000000..0bce634 --- /dev/null +++ b/src/interface/parse/src/parser.rs @@ -0,0 +1,535 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +//! Total, panic-free recursive-descent parser for the VCL-total +//! relational core. Every path returns `Result`; cursor reads via +//! `slice::get` and advances via `saturating_add`. +//! +//! SCOPE (P5a first slice, #25): `SELECT … FROM … [WHERE] [GROUP BY] +//! [HAVING] [ORDER BY] [LIMIT] [OFFSET]` with the full expression +//! grammar (literals, params, `*`, fields, aggregates, comparisons, +//! AND/OR/NOT, parenthesised sub-expressions, scalar sub-queries). The +//! VCL-total extension clauses (PROOF / EFFECTS / AT VERSION / +//! CONSUME|USAGE / EPISTEMIC) and modal `KNOWS/BELIEVES/ANNOUNCE` +//! expressions are *fail-closed*: a typed `Unsupported` error, never a +//! silently-wrong AST. Coverage iterates per the #25 roadmap. + +use crate::ast::*; + +/// A parse failure: human message + 0-based char offset (from the +/// token's span, or `None` at end of input). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ParseError { + pub msg: String, + pub at: Option, +} + +impl core::fmt::Display for ParseError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self.at { + Some(p) => write!(f, "parse error at char {p}: {}", self.msg), + None => write!(f, "parse error at end of input: {}", self.msg), + } + } +} +impl std::error::Error for ParseError {} + +use crate::lexer::{Spanned, Tok}; + +struct P { + toks: Vec, + pos: usize, +} + +impl P { + fn peek(&self) -> Option<&Tok> { + self.toks.get(self.pos).map(|s| &s.tok) + } + fn at(&self) -> Option { + self.toks.get(self.pos).map(|s| s.at) + } + fn bump(&mut self) { + self.pos = self.pos.saturating_add(1); + } + fn err(&self, msg: impl Into) -> Result { + Err(ParseError { + msg: msg.into(), + at: self.at(), + }) + } + fn unsupported(&self, what: &str) -> Result { + Err(ParseError { + msg: format!( + "{what} is not yet supported by the P5a parser slice \ + (fail-closed; tracked in #25)" + ), + at: self.at(), + }) + } + /// Case-insensitive keyword test on the current token. + fn is_kw(&self, kw: &str) -> bool { + matches!(self.peek(), Some(Tok::Word(w)) if w.eq_ignore_ascii_case(kw)) + } + /// Consume a specific keyword or error. + fn eat_kw(&mut self, kw: &str) -> Result<(), ParseError> { + if self.is_kw(kw) { + self.bump(); + Ok(()) + } else { + self.err(format!("expected keyword `{kw}`")) + } + } + fn is_sym(&self, s: &str) -> bool { + matches!(self.peek(), Some(Tok::Sym(x)) if *x == s) + } + fn eat_sym(&mut self, s: &str) -> Result<(), ParseError> { + if self.is_sym(s) { + self.bump(); + Ok(()) + } else { + self.err(format!("expected `{s}`")) + } + } +} + +/// Parse a complete VCL-total query string into a [`Statement`]. +pub fn parse(input: &str) -> Result { + let toks = crate::lexer::lex(input).map_err(|e| ParseError { + msg: e.msg, + at: Some(e.at), + })?; + let mut p = P { toks, pos: 0 }; + let stmt = parse_statement(&mut p)?; + if p.is_sym(";") { + p.bump(); + } + if p.peek().is_some() { + return p.err("trailing tokens after statement"); + } + Ok(stmt) +} + +fn parse_statement(p: &mut P) -> Result { + p.eat_kw("SELECT")?; + let select_items = parse_select_items(p)?; + p.eat_kw("FROM")?; + let source = parse_source(p)?; + + let mut where_clause = None; + if p.is_kw("WHERE") { + p.bump(); + where_clause = Some(parse_expr(p)?); + } + + let mut group_by = Vec::new(); + if p.is_kw("GROUP") { + p.bump(); + p.eat_kw("BY")?; + group_by = parse_field_list(p)?; + } + + let mut having = None; + if p.is_kw("HAVING") { + p.bump(); + having = Some(parse_expr(p)?); + } + + let mut order_by = Vec::new(); + if p.is_kw("ORDER") { + p.bump(); + p.eat_kw("BY")?; + order_by = parse_order_list(p)?; + } + + let mut limit = None; + if p.is_kw("LIMIT") { + p.bump(); + limit = Some(parse_u64(p)?); + } + let mut offset = None; + if p.is_kw("OFFSET") { + p.bump(); + offset = Some(parse_u64(p)?); + } + + // Fail-closed on the not-yet-covered extension clauses. + for kw in ["PROOF", "EFFECTS", "AT", "CONSUME", "USAGE", "EPISTEMIC"] { + if p.is_kw(kw) { + return p.unsupported(&format!("the `{kw}` clause")); + } + } + + let mut stmt = Statement { + select_items, + source, + where_clause, + group_by, + having, + order_by, + limit, + offset, + proof_clause: None, + effect_decl: None, + version_const: None, + linear_annot: None, + epistemic_clause: None, + requested_level: SafetyLevel::ParseSafe, + }; + stmt.requested_level = infer_requested_level(&stmt); + Ok(stmt) +} + +/// P5a requested-level policy (DOCUMENTED, conservative — NOT a proven +/// fact). The certifier checks *up to* this level; we pick the highest +/// feature-implied level present, with a relational base. Authoritative +/// reconciliation with the ReScript bridge's inference is a later #25 +/// iteration. Until then this is the parser's stated intent only. +fn infer_requested_level(s: &Statement) -> SafetyLevel { + if s.epistemic_clause.is_some() { + SafetyLevel::EpistemicSafe + } else if s.linear_annot.is_some() { + SafetyLevel::LinearSafe + } else if s.version_const.is_some() { + SafetyLevel::TemporalSafe + } else if s.effect_decl.is_some() { + SafetyLevel::EffectTracked + } else if s.limit.is_some() { + SafetyLevel::CardinalitySafe + } else if s.where_clause.is_some() || s.having.is_some() { + // A predicate present ⇒ at least injection-safety is wanted. + SafetyLevel::InjectionProof + } else { + SafetyLevel::SchemaBound + } +} + +fn parse_select_items(p: &mut P) -> Result, ParseError> { + let mut items = Vec::new(); + loop { + items.push(parse_select_item(p)?); + if p.is_sym(",") { + p.bump(); + continue; + } + break; + } + Ok(items) +} + +fn parse_select_item(p: &mut P) -> Result { + for kw in ["KNOWS", "BELIEVES", "ANNOUNCE", "COMMON"] { + if p.is_kw(kw) { + return p.unsupported("modal/epistemic expressions"); + } + } + if p.is_sym("*") { + p.bump(); + return Ok(SelectItem::Star); + } + if let Some(agg) = peek_agg(p) { + p.bump(); + p.eat_sym("(")?; + let e = if p.is_sym("*") { + p.bump(); + Expr::Star + } else { + parse_expr(p)? + }; + p.eat_sym(")")?; + return Ok(SelectItem::Aggregate(agg, e)); + } + if let Some(m) = peek_modality(p) { + // `MODALITY.field` => field item; bare `MODALITY` => whole modality. + p.bump(); + if p.is_sym(".") { + p.bump(); + let field_name = parse_ident(p)?; + return Ok(SelectItem::Field(FieldRef { + modality: m, + field_name, + })); + } + return Ok(SelectItem::Modality(m)); + } + p.err("expected a SELECT item (`*`, MODALITY[.field], or AGG(expr))") +} + +fn parse_source(p: &mut P) -> Result { + // `OCTAD`/`HEXAD `, `FEDERATION `, `STORE `. + // OCTAD/HEXAD both accepted (grammar comment vs constructor name + // disagree); reconciled with the ReScript bridge in a later slice. + if p.is_kw("OCTAD") || p.is_kw("HEXAD") { + p.bump(); + return Ok(Source::Octad(parse_source_arg(p)?)); + } + if p.is_kw("FEDERATION") { + p.bump(); + return Ok(Source::Federation(parse_source_arg(p)?)); + } + if p.is_kw("STORE") { + p.bump(); + return Ok(Source::Store(parse_source_arg(p)?)); + } + p.err("expected FROM source (OCTAD|HEXAD|FEDERATION|STORE )") +} + +fn parse_source_arg(p: &mut P) -> Result { + match p.peek() { + Some(Tok::Str(s)) => { + let v = s.clone(); + p.bump(); + Ok(v) + } + Some(Tok::Word(w)) => { + let v = w.clone(); + p.bump(); + Ok(v) + } + _ => p.err("expected a source identifier (word or 'string')"), + } +} + +fn parse_field_list(p: &mut P) -> Result, ParseError> { + let mut fields = Vec::new(); + loop { + fields.push(parse_field_ref(p)?); + if p.is_sym(",") { + p.bump(); + continue; + } + break; + } + Ok(fields) +} + +fn parse_order_list(p: &mut P) -> Result, ParseError> { + let mut out = Vec::new(); + loop { + let f = parse_field_ref(p)?; + let asc = if p.is_kw("DESC") { + p.bump(); + false + } else { + if p.is_kw("ASC") { + p.bump(); + } + true + }; + out.push((f, asc)); + if p.is_sym(",") { + p.bump(); + continue; + } + break; + } + Ok(out) +} + +fn parse_field_ref(p: &mut P) -> Result { + let m = peek_modality(p) + .ok_or(()) + .or_else(|()| p.err("expected MODALITY in field reference"))?; + p.bump(); + p.eat_sym(".")?; + let field_name = parse_ident(p)?; + Ok(FieldRef { + modality: m, + field_name, + }) +} + +// ── Expressions: OR < AND < NOT < comparison < primary ──────────────── + +fn parse_expr(p: &mut P) -> Result { + parse_or(p) +} + +fn parse_or(p: &mut P) -> Result { + let mut lhs = parse_and(p)?; + while p.is_kw("OR") { + p.bump(); + let rhs = parse_and(p)?; + lhs = Expr::Logic(LogicOp::Or, Box::new(lhs), Some(Box::new(rhs))); + } + Ok(lhs) +} + +fn parse_and(p: &mut P) -> Result { + let mut lhs = parse_not(p)?; + while p.is_kw("AND") { + p.bump(); + let rhs = parse_not(p)?; + lhs = Expr::Logic(LogicOp::And, Box::new(lhs), Some(Box::new(rhs))); + } + Ok(lhs) +} + +fn parse_not(p: &mut P) -> Result { + if p.is_kw("NOT") { + p.bump(); + let inner = parse_not(p)?; + return Ok(Expr::Logic(LogicOp::Not, Box::new(inner), None)); + } + parse_comparison(p) +} + +fn parse_comparison(p: &mut P) -> Result { + let lhs = parse_primary(p)?; + let op = match p.peek() { + Some(Tok::Sym("=")) => Some(CompOp::Eq), + Some(Tok::Sym("!=")) => Some(CompOp::NotEq), + Some(Tok::Sym("<")) => Some(CompOp::Lt), + Some(Tok::Sym(">")) => Some(CompOp::Gt), + Some(Tok::Sym("<=")) => Some(CompOp::LtEq), + Some(Tok::Sym(">=")) => Some(CompOp::GtEq), + Some(Tok::Word(w)) if w.eq_ignore_ascii_case("LIKE") => Some(CompOp::Like), + Some(Tok::Word(w)) if w.eq_ignore_ascii_case("IN") => Some(CompOp::In), + _ => None, + }; + match op { + None => Ok(lhs), + Some(o) => { + p.bump(); + let rhs = parse_primary(p)?; + Ok(Expr::Compare(o, Box::new(lhs), Box::new(rhs))) + } + } +} + +fn parse_primary(p: &mut P) -> Result { + // Modal expression nodes are fail-closed for the P5a slice. + for kw in ["KNOWS", "BELIEVES", "ANNOUNCE", "COMMON"] { + if p.is_kw(kw) { + return p.unsupported("modal/epistemic expressions"); + } + } + match p.peek() { + Some(Tok::Sym("*")) => { + p.bump(); + Ok(Expr::Star) + } + Some(Tok::Sym("(")) => { + p.bump(); + // `(SELECT …)` => scalar sub-query; else parenthesised expr. + if p.is_kw("SELECT") { + let sub = parse_statement(p)?; + p.eat_sym(")")?; + Ok(Expr::Subquery(Box::new(sub))) + } else { + let e = parse_expr(p)?; + p.eat_sym(")")?; + Ok(e) + } + } + Some(Tok::Str(s)) => { + let v = s.clone(); + p.bump(); + Ok(Expr::Literal(Literal::Str(v))) + } + Some(Tok::Int(n)) => { + let v = *n; + p.bump(); + Ok(Expr::Literal(Literal::Int(v))) + } + Some(Tok::Float(f)) => { + let v = *f; + p.bump(); + Ok(Expr::Literal(Literal::Float(v))) + } + Some(Tok::Param(name)) => { + let v = name.clone(); + p.bump(); + Ok(Expr::Param(v)) + } + Some(Tok::Word(w)) => { + // Keyword literals first. + if w.eq_ignore_ascii_case("TRUE") { + p.bump(); + return Ok(Expr::Literal(Literal::Bool(true))); + } + if w.eq_ignore_ascii_case("FALSE") { + p.bump(); + return Ok(Expr::Literal(Literal::Bool(false))); + } + if w.eq_ignore_ascii_case("NULL") { + p.bump(); + return Ok(Expr::Literal(Literal::Null)); + } + if let Some(agg) = peek_agg(p) { + p.bump(); + p.eat_sym("(")?; + let e = if p.is_sym("*") { + p.bump(); + Expr::Star + } else { + parse_expr(p)? + }; + p.eat_sym(")")?; + return Ok(Expr::Aggregate(agg, Box::new(e))); + } + if peek_modality(p).is_some() { + return Ok(Expr::Field(parse_field_ref(p)?)); + } + p.err(format!("unexpected word `{w}` in expression")) + } + _ => p.err("expected an expression"), + } +} + +// ── Token classifiers ───────────────────────────────────────────────── + +fn peek_modality(p: &P) -> Option { + if let Some(Tok::Word(w)) = p.peek() { + let u = w.to_ascii_uppercase(); + return match u.as_str() { + "GRAPH" => Some(Modality::Graph), + "VECTOR" => Some(Modality::Vector), + "TENSOR" => Some(Modality::Tensor), + "SEMANTIC" => Some(Modality::Semantic), + "DOCUMENT" => Some(Modality::Document), + "TEMPORAL" => Some(Modality::Temporal), + "PROVENANCE" => Some(Modality::Provenance), + "SPATIAL" => Some(Modality::Spatial), + _ => None, + }; + } + None +} + +fn peek_agg(p: &P) -> Option { + if let Some(Tok::Word(w)) = p.peek() { + let u = w.to_ascii_uppercase(); + return match u.as_str() { + "COUNT" => Some(AggFunc::Count), + "SUM" => Some(AggFunc::Sum), + "AVG" => Some(AggFunc::Avg), + "MIN" => Some(AggFunc::Min), + "MAX" => Some(AggFunc::Max), + _ => None, + }; + } + None +} + +fn parse_ident(p: &mut P) -> Result { + match p.peek() { + Some(Tok::Word(w)) => { + let v = w.clone(); + p.bump(); + Ok(v) + } + _ => p.err("expected an identifier"), + } +} + +fn parse_u64(p: &mut P) -> Result { + match p.peek() { + Some(Tok::Int(n)) if *n >= 0 => { + let v = *n as u64; + p.bump(); + Ok(v) + } + Some(Tok::Int(_)) => p.err("expected a non-negative integer"), + _ => p.err("expected an integer"), + } +} diff --git a/src/interface/parse/tests/parse.rs b/src/interface/parse/tests/parse.rs new file mode 100644 index 0000000..70a1862 --- /dev/null +++ b/src/interface/parse/tests/parse.rs @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +//! Totality / panic-freedom property for the trusted parser (P5a, #25). +//! The SPARK-grade claim is "no input panics; every failure is a typed +//! `ParseError`". `proptest` fails the run if any input panics, so a +//! green run over these strategies is machine evidence of that. + +#![allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] + +use proptest::prelude::*; +use vcltotal_parse::parse; + +proptest! { + #![proptest_config(ProptestConfig::with_cases(4096))] + + /// Arbitrary UTF-8: parsing must terminate with Ok/Err, never panic. + #[test] + fn never_panics_on_arbitrary_text(s in ".*") { + let _ = parse(&s); + } + + /// Arbitrary bytes interpreted lossily — still no panic. + #[test] + fn never_panics_on_arbitrary_bytes(bytes in proptest::collection::vec(any::(), 0..512)) { + let s = String::from_utf8_lossy(&bytes); + let _ = parse(&s); + } + + /// Token-salad from the real lexical alphabet: stresses the + /// recursive-descent paths without panicking. + #[test] + fn never_panics_on_token_salad( + toks in proptest::collection::vec( + prop_oneof![ + Just("SELECT"), Just("FROM"), Just("WHERE"), Just("STORE"), + Just("OCTAD"), Just("GRAPH.x"), Just("VECTOR.y"), Just("AND"), + Just("OR"), Just("NOT"), Just("COUNT"), Just("("), Just(")"), + Just("*"), Just(","), Just("="), Just(">="), Just("$1"), + Just("'lit'"), Just("42"), Just("3.14"), Just(";"), + Just("LIMIT"), Just("GROUP"), Just("BY"), Just("EFFECTS"), + ], + 0..40, + ) + ) { + let s = toks.join(" "); + let _ = parse(&s); + } +} + +#[test] +fn known_good_queries_parse() { + let oks = [ + "SELECT * FROM STORE main", + "SELECT GRAPH.a, VECTOR.b FROM OCTAD u WHERE GRAPH.a = $1", + "SELECT COUNT(*) FROM HEXAD h GROUP BY GRAPH.k HAVING COUNT(*) > 2", + "SELECT * FROM STORE s WHERE GRAPH.id IN (SELECT GRAPH.id FROM STORE t);", + "SELECT SEMANTIC FROM FEDERATION 'pat-*' ORDER BY SEMANTIC.score DESC LIMIT 7", + ]; + for q in oks { + assert!(parse(q).is_ok(), "should parse: {q}"); + } +} + +#[test] +fn fail_closed_on_unsupported_extensions() { + // Honest: a typed error, never a silently-wrong AST. + for q in [ + "SELECT * FROM STORE s PROOF ATTACHED", + "SELECT * FROM STORE s AT VERSION = 3", + "SELECT * FROM STORE s EPISTEMIC { AGENTS ENGINE }", + "SELECT KNOWS ENGINE GRAPH.x FROM STORE s", + ] { + let err = parse(q).expect_err("must fail-closed"); + assert!( + err.msg.contains("not yet supported") || err.msg.contains("fail-closed"), + "expected fail-closed error for {q:?}, got {err}" + ); + } +} From 2ecb6604df4946680e75fb6eb307453840e83108 Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Tue, 19 May 2026 14:53:23 +0100 Subject: [PATCH 2/3] vcl-ut #25 P5a: lock vcltotal-parse deps (proptest) for deterministic CI Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 4ed0056..d857b00 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1922,6 +1922,13 @@ dependencies = [ "vcltotal-interface", ] +[[package]] +name = "vcltotal-parse" +version = "0.1.0" +dependencies = [ + "proptest", +] + [[package]] name = "wait-timeout" version = "0.2.1" From 598df328658c6604c5b6e6a2bbb7582ba0664d61 Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Tue, 19 May 2026 15:59:39 +0100 Subject: [PATCH 3/3] vcl-ut #25 P5a: make vcltotal-parse a standalone workspace (CI isolation) CI revealed the parent virtual workspace cannot load in a standalone checkout: the pre-existing src/interface member path-deps on an out-of-repo sibling (echidna-core = ../../../echidna/...), which only resolves on the estate layout. Any whole-workspace cargo op (incl. the prior parse-gate, and the repos other Rust jobs) fails fast in CI. Decouple the trusted boundary parser: it is now its own [workspace] root with its own Cargo.lock; parse-gate runs via --manifest-path --locked, never traversing the broken parent. Remove it from the root members. P5a code unchanged; verified standalone: clippy --locked -D warnings clean, 11/11 tests, fmt clean. Root Cargo.lock reverted to main (the earlier change is moot now parse is not a member). The parent-workspace external-path-dep breakage is pre-existing and out of #25 scope; reported separately. Refs hyperpolymath/vcl-ut#25. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/parse-gate.yml | 10 +- Cargo.lock | 7 - Cargo.toml | 10 +- src/interface/parse/Cargo.lock | 627 +++++++++++++++++++++++++++++++ src/interface/parse/Cargo.toml | 8 + 5 files changed, 652 insertions(+), 10 deletions(-) create mode 100644 src/interface/parse/Cargo.lock diff --git a/.github/workflows/parse-gate.yml b/.github/workflows/parse-gate.yml index 44881f9..032a94a 100644 --- a/.github/workflows/parse-gate.yml +++ b/.github/workflows/parse-gate.yml @@ -42,12 +42,18 @@ jobs: rustup component add clippy cargo --version && cargo clippy --version + # `vcltotal-parse` is its own workspace root (see its Cargo.toml). + # Operate via its manifest path so CI never traverses the parent + # virtual workspace (whose pre-existing `src/interface` member has + # an external path-dep that does not resolve in a standalone + # checkout). Self-contained: no external path-deps here. - name: Clippy — SPARK-grade lint set as a hard gate run: | set -euo pipefail - cargo clippy -p vcltotal-parse --all-targets -- -D warnings + cargo clippy --manifest-path src/interface/parse/Cargo.toml \ + --all-targets --locked -- -D warnings - name: Tests + panic-free proptest invariant run: | set -euo pipefail - cargo test -p vcltotal-parse + cargo test --manifest-path src/interface/parse/Cargo.toml --locked diff --git a/Cargo.lock b/Cargo.lock index d857b00..4ed0056 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1922,13 +1922,6 @@ dependencies = [ "vcltotal-interface", ] -[[package]] -name = "vcltotal-parse" -version = "0.1.0" -dependencies = [ - "proptest", -] - [[package]] name = "wait-timeout" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index a847712..03398c6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,5 +33,13 @@ members = [ "src/interface/lint", "src/interface/lsp", "src/interface/dap", - "src/interface/parse", ] +# NOTE: `src/interface/parse` (vcltotal-parse, #25 P5a) is deliberately +# NOT a member here. The pre-existing `src/interface` member carries an +# external path-dependency (`echidna-core = { path = +# "../../../echidna/..." }`) that only resolves on the estate's +# sibling-repo layout, so the whole virtual workspace fails to load in a +# standalone checkout (e.g. CI). The trusted boundary parser must build +# and gate in isolation, so it is its own workspace root (see +# src/interface/parse/Cargo.toml `[workspace]`). Tracked separately: +# the parent-workspace external-path-dep breakage is NOT a #25 concern. diff --git a/src/interface/parse/Cargo.lock b/src/interface/parse/Cargo.lock new file mode 100644 index 0000000..0d2604c --- /dev/null +++ b/src/interface/parse/Cargo.lock @@ -0,0 +1,627 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + +[[package]] +name = "bitflags" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi 5.3.0", + "wasip2", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.1", + "serde", + "serde_core", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "proptest" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b45fcc2344c680f5025fe57779faef368840d0bd1f42f216291f0dc4ace4744" +dependencies = [ + "bit-set", + "bit-vec", + "bitflags", + "num-traits", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "rusty-fork", + "tempfile", + "unarray", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rand" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rand_xorshift" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" +dependencies = [ + "rand_core", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "rusty-fork" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2" +dependencies = [ + "fnv", + "quick-error", + "tempfile", + "wait-timeout", +] + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom 0.4.2", + "once_cell", + "rustix", + "windows-sys", +] + +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "vcltotal-parse" +version = "0.1.0" +dependencies = [ + "proptest", +] + +[[package]] +name = "wait-timeout" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" +dependencies = [ + "libc", +] + +[[package]] +name = "wasip2" +version = "1.0.3+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" +dependencies = [ + "wit-bindgen 0.57.1", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "zerocopy" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/src/interface/parse/Cargo.toml b/src/interface/parse/Cargo.toml index 7286666..196a91a 100644 --- a/src/interface/parse/Cargo.toml +++ b/src/interface/parse/Cargo.toml @@ -1,4 +1,12 @@ # SPDX-License-Identifier: PMPL-1.0-or-later + +# Standalone workspace root: decouples this trusted boundary crate from +# the parent virtual workspace, whose pre-existing `src/interface` +# member has an external path-dep (`../../../echidna/...`) that does not +# resolve in a standalone checkout (CI). This crate has NO external +# path-deps and must build/gate in isolation. +[workspace] + [package] name = "vcltotal-parse" version = "0.1.0"