From ba6f5dc8bc46b454ea96e0a4456aced6c5e34063 Mon Sep 17 00:00:00 2001 From: hyperpolymath Date: Sun, 17 May 2026 05:46:56 +0100 Subject: [PATCH] feat(logging): tracing diagnostics with --log-format/--log-level (#51) Add tracing + tracing-subscriber. Global --log-format=pretty|json and --log-level flags (precedence --log-level > RUST_LOG > info). The subscriber writes to stderr so the stdout data contract is never contaminated: JSON reports, the version string, the octad table, and the status/gc/validate report bodies stay on stdout verbatim. Only genuine diagnostics (schema parsing, file-write progress, "checking drift", "created manifest") become tracing events. JSON log schema documented in docs/logging.adoc + docs/logging.schema.json. New acceptance suite tests/logging_test.rs (6 tests): stdout purity, JSON-line schema validity, level filtering, RUST_LOG precedence. Closes #51. Co-Authored-By: Claude Opus 4.7 --- Cargo.lock | 150 +++++++++++++++++++++++++++++++++++++++ Cargo.toml | 3 + docs/logging.adoc | 53 ++++++++++++++ docs/logging.schema.json | 37 ++++++++++ src/main.rs | 59 ++++++++++++--- src/manifest/mod.rs | 2 +- tests/logging_test.rs | 146 +++++++++++++++++++++++++++++++++++++ 7 files changed, 439 insertions(+), 11 deletions(-) create mode 100644 docs/logging.adoc create mode 100644 docs/logging.schema.json create mode 100644 tests/logging_test.rs diff --git a/Cargo.lock b/Cargo.lock index 9a715d9..a6c39ad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14,6 +14,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "android_system_properties" version = "0.1.5" @@ -422,6 +431,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "leb128fmt" version = "0.1.0" @@ -457,12 +472,30 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + [[package]] name = "memchr" version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -484,6 +517,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + [[package]] name = "pkg-config" version = "0.3.33" @@ -602,6 +641,17 @@ dependencies = [ "rand_core", ] +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + [[package]] name = "regex-syntax" version = "0.8.10" @@ -722,6 +772,15 @@ dependencies = [ "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -784,6 +843,15 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + [[package]] name = "toml" version = "0.8.23" @@ -825,6 +893,80 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-serde" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" +dependencies = [ + "serde", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "serde", + "serde_json", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", + "tracing-serde", +] + [[package]] name = "typenum" version = "1.19.0" @@ -855,6 +997,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "vcpkg" version = "0.2.15" @@ -876,6 +1024,8 @@ dependencies = [ "tempfile", "thiserror", "toml", + "tracing", + "tracing-subscriber", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index a655619..8443752 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,8 @@ thiserror = "2" chrono = { version = "0.4", features = ["serde"] } sha2 = "0.10" rusqlite = { version = "0.32", features = ["bundled", "hooks"] } +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } [build-dependencies] chrono = "0.4" @@ -29,3 +31,4 @@ chrono = "0.4" [dev-dependencies] tempfile = "3" proptest = "1" +serde_json = "1" diff --git a/docs/logging.adoc b/docs/logging.adoc new file mode 100644 index 0000000..5b0b7f2 --- /dev/null +++ b/docs/logging.adoc @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later += VeriSimiser diagnostic logging +:revdate: 2026-05-17 + +VeriSimiser separates two output streams: + +stdout (data):: +The answer to the command: JSON reports (`status --json`, `gc --json`, +`validate --json`, `doctor --json`, `version --json`), the human-readable +`status` / `gc` / `validate` report bodies, the `octad` table, and the +`version` string. This contract is stable and machine-parseable; logging +configuration never alters it. + +stderr (diagnostics):: +Progress and status messages emitted via `tracing` (e.g. "parsing +schema", "generated sidecar schema", "checking temporal drift", +"created manifest"). Safe to discard; never parse stdout-bound data +out of this stream. + +== Flags + +`--log-format=pretty|json`:: diagnostic rendering. Default `pretty` +(human-readable, coloured). `json` emits one JSON object per line. +`--log-level=trace|debug|info|warn|error`:: minimum level. Precedence: +`--log-level` > `RUST_LOG` > `info`. `RUST_LOG` accepts the full +`tracing-subscriber` `EnvFilter` syntax (e.g. `RUST_LOG=verisimiser=debug`). + +Both flags are global: they may appear before or after the subcommand. + +== JSON log schema + +With `--log-format=json`, each diagnostic line is a JSON object produced by +`tracing-subscriber`'s JSON formatter. A machine-readable JSON Schema is at +`docs/logging.schema.json`. Shape: + +[source,json] +---- +{ + "timestamp": "2026-05-17T12:34:56.789012Z", + "level": "INFO", + "target": "verisimiser", + "fields": { "message": "generated sidecar schema", + "path": ".verisim/sidecar_schema.sql" }, + "spans": [] +} +---- + +`timestamp` (RFC 3339), `level` (`TRACE|DEBUG|INFO|WARN|ERROR`), `target` +(emitting module), and `fields` (always contains `message`; structured +key/values such as `path`, `schema`, `threshold`, `tables` are added per +event) are always present. `spans` is present when emitted inside a span. +Stable contract: consumers may rely on `timestamp`, `level`, `target`, and +`fields.message`. diff --git a/docs/logging.schema.json b/docs/logging.schema.json new file mode 100644 index 0000000..e19a8fb --- /dev/null +++ b/docs/logging.schema.json @@ -0,0 +1,37 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/hyperpolymath/verisimiser/docs/logging.schema.json", + "title": "VeriSimiser JSON diagnostic log line", + "description": "One line of `--log-format=json` output (stderr). stdout data output is NOT covered by this schema.", + "type": "object", + "required": ["timestamp", "level", "target", "fields"], + "properties": { + "timestamp": { + "type": "string", + "format": "date-time", + "description": "RFC 3339 / ISO 8601 event time." + }, + "level": { + "type": "string", + "enum": ["TRACE", "DEBUG", "INFO", "WARN", "ERROR"] + }, + "target": { + "type": "string", + "description": "Emitting module path, e.g. \"verisimiser\"." + }, + "fields": { + "type": "object", + "required": ["message"], + "properties": { + "message": { "type": "string" } + }, + "additionalProperties": true, + "description": "Always carries `message`; structured key/values (path, schema, threshold, tables, …) are added per event." + }, + "spans": { + "type": "array", + "items": { "type": "object" } + } + }, + "additionalProperties": true +} diff --git a/src/main.rs b/src/main.rs index 9733ca8..ed38fcb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -16,9 +16,21 @@ // octad — Show the 8 octad dimensions use anyhow::Result; -use clap::{Parser, Subcommand}; +use clap::{Parser, Subcommand, ValueEnum}; +use tracing_subscriber::EnvFilter; use verisimiser::{abi, codegen, doctor, gc, manifest, tier1}; +/// Diagnostic-log rendering. Data output (reports, version, the octad +/// table) is always written verbatim to stdout regardless of this; this +/// only controls the `tracing` diagnostic stream, which goes to stderr. +#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum)] +enum LogFormat { + /// Human-readable, ANSI-coloured single lines. + Pretty, + /// One JSON object per line (see docs/logging.adoc for the schema). + Json, +} + /// Long version string: ` (, built )`. const LONG_VERSION: &str = concat!( env!("CARGO_PKG_VERSION"), @@ -33,10 +45,36 @@ const LONG_VERSION: &str = concat!( #[derive(Parser)] #[command(name = "verisimiser", version = LONG_VERSION, about, long_about = None)] struct Cli { + /// Diagnostic-log format. Diagnostics go to stderr; command data + /// output always stays on stdout. + #[arg(long, value_enum, default_value_t = LogFormat::Pretty, global = true)] + log_format: LogFormat, + /// Diagnostic-log level: trace|debug|info|warn|error. Overrides + /// `RUST_LOG`. If neither is set, defaults to `info`. + #[arg(long, global = true)] + log_level: Option, #[command(subcommand)] command: Commands, } +/// Install the global `tracing` subscriber. Writes to **stderr** so the +/// diagnostic stream never contaminates the stdout data contract +/// (JSON reports, version strings, the octad table). Precedence for the +/// level filter: `--log-level` > `RUST_LOG` > `info`. +fn init_tracing(format: LogFormat, level: Option<&str>) { + let filter = match level { + Some(l) => EnvFilter::try_new(l).unwrap_or_else(|_| EnvFilter::new("info")), + None => EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")), + }; + let builder = tracing_subscriber::fmt() + .with_env_filter(filter) + .with_writer(std::io::stderr); + match format { + LogFormat::Json => builder.json().init(), + LogFormat::Pretty => builder.init(), + } +} + #[derive(Subcommand)] enum Commands { /// Initialise a verisimiser.toml manifest. @@ -140,6 +178,7 @@ enum Commands { fn main() -> Result<()> { let cli = Cli::parse(); + init_tracing(cli.log_format, cli.log_level.as_deref()); match cli.command { Commands::Init { database, @@ -152,10 +191,10 @@ fn main() -> Result<()> { // Determine schema source: from manifest or auto-detect. let schema = if let Some(ref schema_path) = m.database.schema_source { - println!("Parsing schema from: {}", schema_path); + tracing::info!(schema = %schema_path, "parsing schema"); codegen::parser::parse_schema_file(schema_path)? } else { - println!("No schema-source specified; generating empty overlay."); + tracing::warn!("no schema-source specified; generating empty overlay"); codegen::parser::ParsedSchema { tables: Vec::new(), source: None, @@ -176,19 +215,19 @@ fn main() -> Result<()> { let overlay_ddl = codegen::overlay::generate_sidecar_schema(&schema, &m.octad)?; let overlay_path = format!("{}/sidecar_schema.sql", output); std::fs::write(&overlay_path, &overlay_ddl)?; - println!("Generated sidecar schema: {}", overlay_path); + tracing::info!(path = %overlay_path, "generated sidecar schema"); // Generate query interceptors. let interceptors = codegen::query::generate_interceptors(&schema, &m.octad, backend); let interceptor_sql = codegen::query::render_interceptors(&interceptors); let interceptor_path = format!("{}/interceptors.sql", output); std::fs::write(&interceptor_path, &interceptor_sql)?; - println!("Generated query interceptors: {}", interceptor_path); + tracing::info!(path = %interceptor_path, "generated query interceptors"); - println!( - "\nGeneration complete. {} table(s) processed, {}/8 octad dimensions enabled.", - schema.tables.len(), - m.octad.enabled_count() + tracing::info!( + tables = schema.tables.len(), + octad_enabled = m.octad.enabled_count(), + "generation complete" ); Ok(()) } @@ -228,7 +267,7 @@ fn main() -> Result<()> { .query_map([], |r| r.get::<_, String>(0))? .collect::>()?; - println!("Checking Temporal drift (threshold: {})...", threshold); + tracing::info!(threshold, "checking temporal drift"); let mut reported = 0usize; for entity in &entities { let Some(report) = tier1::drift::detect_temporal_drift(&conn, entity)? else { diff --git a/src/manifest/mod.rs b/src/manifest/mod.rs index 444ae1c..ead1b44 100644 --- a/src/manifest/mod.rs +++ b/src/manifest/mod.rs @@ -582,7 +582,7 @@ pub fn init_manifest(database: &str, name: Option<&str>, force: bool) -> Result< let template = render_manifest_template(database, name); std::fs::write(path, template)?; - println!("Created {} for {} backend", path, database); + tracing::info!(path, backend = database, "created manifest"); Ok(()) } diff --git a/tests/logging_test.rs b/tests/logging_test.rs new file mode 100644 index 0000000..fa84367 --- /dev/null +++ b/tests/logging_test.rs @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) +// +// Acceptance suite for #51 (V-L3-I1): tracing is wired; the stdout data +// contract is never contaminated by diagnostics; `--log-format=json` +// validates against docs/logging.schema.json; `--log-level` and the +// `RUST_LOG` fallback work with the documented precedence. + +use std::process::Command; + +fn bin() -> Command { + Command::new(env!("CARGO_BIN_EXE_verisimiser")) +} + +/// `version --json` is data output: stdout must be exactly one JSON object, +/// with no diagnostic contamination, regardless of log format. +#[test] +fn version_json_data_stays_pure_on_stdout() { + let out = bin() + .args(["--log-format", "json", "version", "--json"]) + .output() + .expect("run verisimiser"); + assert!(out.status.success()); + let stdout = String::from_utf8(out.stdout).unwrap(); + let v: serde_json::Value = + serde_json::from_str(stdout.trim()).expect("stdout is a single clean JSON value"); + assert!(v.get("version").is_some(), "version field present: {v}"); +} + +/// The `octad` table is data output and must land on stdout even with +/// JSON diagnostics enabled. +#[test] +fn octad_table_is_data_on_stdout() { + let out = bin() + .args(["octad", "--log-format", "json"]) + .output() + .expect("run verisimiser"); + assert!(out.status.success()); + let stdout = String::from_utf8(out.stdout).unwrap(); + assert!( + stdout.contains("VeriSimDB Octad"), + "octad data on stdout, got: {stdout}" + ); +} + +/// `init` now produces *no* stdout (its old "Created …" line is a +/// diagnostic). With `--log-format=json` the diagnostic must appear on +/// stderr and validate against the documented schema. +#[test] +fn init_diagnostic_is_json_on_stderr_and_schema_valid() { + let dir = tempfile::tempdir().unwrap(); + let out = bin() + .current_dir(dir.path()) + .args(["--log-format", "json", "--log-level", "info", "init"]) + .output() + .expect("run verisimiser init"); + assert!(out.status.success(), "init exits 0"); + + // Data contract: init has no stdout payload. + let stdout = String::from_utf8(out.stdout).unwrap(); + assert!( + stdout.trim().is_empty(), + "init must not write to stdout, got: {stdout:?}" + ); + + // Diagnostic: a JSON line on stderr matching docs/logging.schema.json. + let stderr = String::from_utf8(out.stderr).unwrap(); + let line = stderr + .lines() + .find(|l| l.contains("created manifest")) + .unwrap_or_else(|| panic!("expected a 'created manifest' log line, got: {stderr}")); + let log: serde_json::Value = + serde_json::from_str(line).expect("each diagnostic line is valid JSON"); + + // Documented stable fields. + assert!(log["timestamp"].is_string(), "timestamp present"); + let level = log["level"].as_str().expect("level is a string"); + assert!( + ["TRACE", "DEBUG", "INFO", "WARN", "ERROR"].contains(&level), + "level in enum, got {level}" + ); + assert!(log["target"].is_string(), "target present"); + assert_eq!( + log["fields"]["message"], "created manifest", + "fields.message carries the event message" + ); + // The structured field on the event is preserved. + assert!( + log["fields"]["backend"].is_string(), + "structured backend field present: {log}" + ); +} + +/// `--log-level=error` filters out the info diagnostic; stdout still empty. +#[test] +fn log_level_filters_diagnostics() { + let dir = tempfile::tempdir().unwrap(); + let out = bin() + .current_dir(dir.path()) + .args(["--log-format", "json", "--log-level", "error", "init"]) + .output() + .expect("run verisimiser init"); + assert!(out.status.success()); + let stderr = String::from_utf8(out.stderr).unwrap(); + assert!( + !stderr.contains("created manifest"), + "info diagnostic suppressed at --log-level=error, got: {stderr}" + ); +} + +/// `--log-level` overrides `RUST_LOG` (documented precedence: +/// --log-level > RUST_LOG > info). +#[test] +fn log_level_flag_overrides_rust_log_env() { + let dir = tempfile::tempdir().unwrap(); + let out = bin() + .current_dir(dir.path()) + .env("RUST_LOG", "error") + .args(["--log-format", "json", "--log-level", "info", "init"]) + .output() + .expect("run verisimiser init"); + assert!(out.status.success()); + let stderr = String::from_utf8(out.stderr).unwrap(); + assert!( + stderr.contains("created manifest"), + "--log-level=info must override RUST_LOG=error, got: {stderr}" + ); +} + +/// With no flag and no `RUST_LOG`, the default level is `info`. +#[test] +fn default_level_is_info() { + let dir = tempfile::tempdir().unwrap(); + let out = bin() + .current_dir(dir.path()) + .env_remove("RUST_LOG") + .args(["--log-format", "json", "init"]) + .output() + .expect("run verisimiser init"); + assert!(out.status.success()); + let stderr = String::from_utf8(out.stderr).unwrap(); + assert!( + stderr.contains("created manifest"), + "info visible by default, got: {stderr}" + ); +}