Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
291 changes: 249 additions & 42 deletions src/codegen/overlay.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,21 +63,62 @@ fn must_validate_identifier(name: &str) -> &str {
}
}

// ---------------------------------------------------------------------------
// SQL dialect (V-L2-F1, #45)
// ---------------------------------------------------------------------------

/// The SQL dialect the sidecar DDL is emitted for. Selected from the
/// manifest's `[sidecar].storage`. The table bodies are written in the
/// portable subset both engines accept (`CREATE TABLE IF NOT EXISTS`,
/// `CHECK`, partial unique indexes, `CURRENT_TIMESTAMP`); the only
/// genuinely dialect-divergent fragment is the metadata upsert
/// (`INSERT OR IGNORE` vs `INSERT … ON CONFLICT DO NOTHING`), which lives
/// in the [`sqlite`] / [`postgres`] modules.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum SqlDialect {
Sqlite,
Postgres,
}

impl SqlDialect {
/// Map a `[sidecar].storage` value to a dialect. `sqlite` →
/// [`SqlDialect::Sqlite`]; `postgres`/`postgresql` →
/// [`SqlDialect::Postgres`]. `json` and unknown values are rejected
/// (the previous behaviour silently emitted SQLite DDL regardless,
/// V-L2-F1). The JSON store is tracked separately by #112.
pub fn from_storage(storage: &str) -> anyhow::Result<Self> {
match storage.to_lowercase().as_str() {
"sqlite" => Ok(SqlDialect::Sqlite),
"postgres" | "postgresql" => Ok(SqlDialect::Postgres),
"json" => anyhow::bail!(
"[sidecar].storage = \"json\" is not implemented (it previously \
emitted SQLite DDL silently). Use \"sqlite\". The JSON sidecar \
store is tracked by hyperpolymath/verisimiser#112."
),
other => anyhow::bail!(
"unknown [sidecar].storage {other:?}; supported: \"sqlite\" \
(\"postgres\" for a PostgreSQL sidecar; \"json\" is #112)."
),
}
}
}

// ---------------------------------------------------------------------------
// Overlay generation
// ---------------------------------------------------------------------------

/// Generate the complete sidecar schema DDL for all enabled octad dimensions.
/// Generate the complete sidecar schema DDL for all enabled octad
/// dimensions, in the requested SQL `dialect`.
///
/// The generated SQL is SQLite-compatible (the default sidecar backend).
/// Each table is created with `IF NOT EXISTS` so the schema can be applied
/// idempotently.
/// idempotently. Dispatches to [`sqlite::generate`] / [`postgres::generate`].
///
/// # Arguments
/// * `schema` - The parsed schema of the target database (used to reference
/// table names in provenance and temporal tracking).
/// * `octad` - The octad configuration from the manifest, controlling which
/// dimension tables to generate.
/// * `dialect` - The sidecar SQL dialect (see [`SqlDialect::from_storage`]).
///
/// # Returns
/// `Ok(String)` with the complete DDL on success. `Err` if any table or
Expand All @@ -87,6 +128,21 @@ fn must_validate_identifier(name: &str) -> &str {
pub fn generate_sidecar_schema(
schema: &ParsedSchema,
octad: &OctadConfig,
dialect: SqlDialect,
) -> anyhow::Result<String> {
match dialect {
SqlDialect::Sqlite => sqlite::generate(schema, octad),
SqlDialect::Postgres => postgres::generate(schema, octad),
}
}

/// Shared schema body: validate identifiers, then assemble every enabled
/// octad table. The metadata *seed* (the only dialect-divergent fragment)
/// is supplied by the caller via `seed`.
fn assemble(
schema: &ParsedSchema,
octad: &OctadConfig,
seed: impl Fn(&ParsedSchema) -> String,
) -> anyhow::Result<String> {
use crate::codegen::ident::validate_identifier;

Expand All @@ -105,7 +161,9 @@ pub fn generate_sidecar_schema(
ddl.push_str("-- Do not edit manually; regenerate with `verisimiser init`.\n\n");

// Metadata table: tracks which target tables are being augmented.
ddl.push_str(&generate_metadata_table(schema));
// The CREATE is portable; the seed upsert is dialect-specific.
ddl.push_str(&metadata_table_ddl());
ddl.push_str(&seed(schema));

if octad.enable_provenance {
ddl.push_str(&generate_provenance_table());
Expand All @@ -130,52 +188,103 @@ pub fn generate_sidecar_schema(
Ok(ddl)
}

/// Generate the metadata table that tracks which target tables are augmented.
/// The metadata table CREATE — portable across SQLite and PostgreSQL.
///
/// This table is always created regardless of octad configuration, because
/// the Data and Metadata dimensions are always active.
fn generate_metadata_table(schema: &ParsedSchema) -> String {
let mut ddl = String::new();
/// the Data and Metadata dimensions are always active. The per-table seed
/// rows are dialect-specific and emitted by [`sqlite`] / [`postgres`].
fn metadata_table_ddl() -> String {
"-- Metadata: tracks augmented target tables\n\
CREATE TABLE IF NOT EXISTS verisimdb_metadata (\n\
\x20 table_name TEXT PRIMARY KEY,\n\
\x20 column_count INTEGER NOT NULL,\n\
\x20 pk_columns TEXT NOT NULL, -- comma-separated list of PK column names\n\
\x20 discovered_at TEXT NOT NULL -- ISO 8601 timestamp\n\
);\n\n"
.to_string()
}

ddl.push_str("-- Metadata: tracks augmented target tables\n");
ddl.push_str(
"CREATE TABLE IF NOT EXISTS verisimdb_metadata (\n\
\x20 table_name TEXT PRIMARY KEY,\n\
\x20 column_count INTEGER NOT NULL,\n\
\x20 pk_columns TEXT NOT NULL, -- comma-separated list of PK column names\n\
\x20 discovered_at TEXT NOT NULL -- ISO 8601 timestamp\n\
);\n\n",
);

// Generate INSERT statements for each discovered table.
//
// V-L2-G1: every identifier flowing into the SQL string here is
// validated. Anything that wouldn't match `^[A-Za-z_][A-Za-z0-9_]*$`
// is rejected at codegen time rather than allowed to land in DDL
// (where it would be an injection vector).
if !schema.tables.is_empty() {
ddl.push_str("-- Seed metadata from parsed schema\n");
for table in &schema.tables {
let table_name = must_validate_identifier(&table.name);
let pk_cols: Vec<&str> = table
/// Per-table seed values, shared by both dialects. Returns
/// `(validated_table_name, column_count, validated_pk_csv)`.
///
/// V-L2-G1: every identifier flowing into the SQL string here is
/// validated. Anything that wouldn't match `^[A-Za-z_][A-Za-z0-9_]*$`
/// is rejected at codegen time rather than allowed to land in DDL
/// (where it would be an injection vector).
fn metadata_rows(schema: &ParsedSchema) -> Vec<(&str, usize, String)> {
schema
.tables
.iter()
.map(|table| {
let name = must_validate_identifier(&table.name);
let pk_csv = table
.columns
.iter()
.filter(|c| c.is_primary_key)
.map(|c| must_validate_identifier(c.name.as_str()))
.collect();
let pk_str = pk_cols.join(",");
.collect::<Vec<_>>()
.join(",");
(name, table.columns.len(), pk_csv)
})
.collect()
}

/// SQLite-specific sidecar DDL emission (V-L2-F1, #45).
pub mod sqlite {
use super::*;

/// SQLite metadata seed: `INSERT OR IGNORE` + portable
/// `CURRENT_TIMESTAMP` (was the SQLite-only `datetime('now')`).
pub(super) fn metadata_seed(schema: &ParsedSchema) -> String {
if schema.tables.is_empty() {
return String::new();
}
let mut ddl = String::from("-- Seed metadata from parsed schema (SQLite)\n");
for (name, ncols, pk_csv) in metadata_rows(schema) {
ddl.push_str(&format!(
"INSERT OR IGNORE INTO verisimdb_metadata (table_name, column_count, pk_columns, discovered_at)\n\
\x20 VALUES ('{}', {}, '{}', datetime('now'));\n",
table_name,
table.columns.len(),
pk_str,
\x20 VALUES ('{}', {}, '{}', CURRENT_TIMESTAMP);\n",
name, ncols, pk_csv,
));
}
ddl.push('\n');
ddl
}

/// Generate the full SQLite sidecar schema.
pub fn generate(schema: &ParsedSchema, octad: &OctadConfig) -> anyhow::Result<String> {
assemble(schema, octad, metadata_seed)
}
}

/// PostgreSQL-specific sidecar DDL emission (V-L2-F1, #45).
pub mod postgres {
use super::*;

/// PostgreSQL metadata seed: `INSERT … ON CONFLICT DO NOTHING`
/// (SQLite's `INSERT OR IGNORE` is not valid PostgreSQL) + portable
/// `CURRENT_TIMESTAMP`.
pub(super) fn metadata_seed(schema: &ParsedSchema) -> String {
if schema.tables.is_empty() {
return String::new();
}
let mut ddl = String::from("-- Seed metadata from parsed schema (PostgreSQL)\n");
for (name, ncols, pk_csv) in metadata_rows(schema) {
ddl.push_str(&format!(
"INSERT INTO verisimdb_metadata (table_name, column_count, pk_columns, discovered_at)\n\
\x20 VALUES ('{}', {}, '{}', CURRENT_TIMESTAMP)\n\
\x20 ON CONFLICT (table_name) DO NOTHING;\n",
name, ncols, pk_csv,
));
}
ddl.push('\n');
ddl
}

ddl
/// Generate the full PostgreSQL sidecar schema.
pub fn generate(schema: &ParsedSchema, octad: &OctadConfig) -> anyhow::Result<String> {
assemble(schema, octad, metadata_seed)
}
}

/// Generate the provenance log table for the Provenance dimension.
Expand Down Expand Up @@ -380,7 +489,8 @@ mod tests {
enable_constraints: true,
enable_simulation: true,
};
let ddl = generate_sidecar_schema(&schema, &octad).expect("test schema must validate");
let ddl = generate_sidecar_schema(&schema, &octad, SqlDialect::Sqlite)
.expect("test schema must validate");

assert!(ddl.contains("verisimdb_metadata"));
assert!(ddl.contains("verisimdb_provenance_log"));
Expand All @@ -404,7 +514,8 @@ mod tests {
enable_constraints: true,
enable_simulation: true,
};
let ddl = generate_sidecar_schema(&schema, &octad).expect("test schema must validate");
let ddl = generate_sidecar_schema(&schema, &octad, SqlDialect::Sqlite)
.expect("test schema must validate");

// Self-referencing FK on parent_branch.
assert!(
Expand Down Expand Up @@ -447,7 +558,8 @@ mod tests {
enable_constraints: false,
enable_simulation: false,
};
let ddl = generate_sidecar_schema(&schema, &octad).expect("test schema must validate");
let ddl = generate_sidecar_schema(&schema, &octad, SqlDialect::Sqlite)
.expect("test schema must validate");
assert!(ddl.contains("verisimdb_temporal_versions"));
assert!(
ddl.contains("CREATE UNIQUE INDEX IF NOT EXISTS ux_temporal_current"),
Expand Down Expand Up @@ -478,7 +590,8 @@ mod tests {
enable_constraints: false,
enable_simulation: false,
};
let ddl = generate_sidecar_schema(&schema, &octad).expect("test schema must validate");
let ddl = generate_sidecar_schema(&schema, &octad, SqlDialect::Sqlite)
.expect("test schema must validate");
assert!(ddl.contains("verisimdb_lineage_graph"));
// The exact CHECK clause must be present in the emitted DDL.
assert!(
Expand All @@ -500,7 +613,8 @@ mod tests {
enable_constraints: false,
enable_simulation: false,
};
let ddl = generate_sidecar_schema(&schema, &octad).expect("test schema must validate");
let ddl = generate_sidecar_schema(&schema, &octad, SqlDialect::Sqlite)
.expect("test schema must validate");

// Metadata is always generated.
assert!(ddl.contains("verisimdb_metadata"));
Expand All @@ -516,7 +630,8 @@ mod tests {
fn test_metadata_seeds_table_info() {
let schema = test_schema();
let octad = OctadConfig::default();
let ddl = generate_sidecar_schema(&schema, &octad).expect("test schema must validate");
let ddl = generate_sidecar_schema(&schema, &octad, SqlDialect::Sqlite)
.expect("test schema must validate");

assert!(ddl.contains("INSERT OR IGNORE INTO verisimdb_metadata"));
assert!(ddl.contains("'posts'"));
Expand Down Expand Up @@ -642,4 +757,96 @@ mod tests {
);
}
}

// --- #45 acceptance: per-dialect DDL + storage mapping ---

#[test]
fn test_sqlite_dialect_seed_and_portable_timestamp() {
let schema = test_schema();
let octad = OctadConfig::default();
let ddl = generate_sidecar_schema(&schema, &octad, SqlDialect::Sqlite)
.expect("sqlite ddl");
assert!(ddl.contains("INSERT OR IGNORE INTO verisimdb_metadata"));
assert!(
ddl.contains("CURRENT_TIMESTAMP"),
"portable timestamp must replace datetime(now)"
);
assert!(
!ddl.contains("datetime('now')"),
"the SQLite-only datetime('now') must be gone (V-L2-F1)"
);
assert!(ddl.contains("'posts'") && ddl.contains("verisimdb_provenance_log"));
}

#[test]
fn test_postgres_dialect_uses_on_conflict_not_or_ignore() {
let schema = test_schema();
let octad = OctadConfig::default();
let ddl = generate_sidecar_schema(&schema, &octad, SqlDialect::Postgres)
.expect("postgres ddl");
assert!(
ddl.contains("ON CONFLICT (table_name) DO NOTHING"),
"postgres metadata upsert must use ON CONFLICT"
);
assert!(
!ddl.contains("INSERT OR IGNORE"),
"INSERT OR IGNORE is not valid PostgreSQL"
);
assert!(ddl.contains("CURRENT_TIMESTAMP") && !ddl.contains("datetime('now')"));
assert!(ddl.contains("verisimdb_metadata") && ddl.contains("'posts'"));
}

#[test]
fn test_both_dialects_share_the_octad_table_bodies() {
let schema = test_schema();
let octad = OctadConfig::default();
let s = generate_sidecar_schema(&schema, &octad, SqlDialect::Sqlite).unwrap();
let p = generate_sidecar_schema(&schema, &octad, SqlDialect::Postgres).unwrap();
for table in [
"verisimdb_provenance_log",
"verisimdb_lineage_graph",
"verisimdb_temporal_versions",
"verisimdb_access_policies",
] {
assert!(s.contains(table), "sqlite missing {table}");
assert!(p.contains(table), "postgres missing {table}");
}
}

#[test]
fn test_empty_schema_emits_no_seed_in_either_dialect() {
let schema = ParsedSchema {
tables: vec![],
source: None,
};
let octad = OctadConfig::default();
let s = generate_sidecar_schema(&schema, &octad, SqlDialect::Sqlite).unwrap();
let p = generate_sidecar_schema(&schema, &octad, SqlDialect::Postgres).unwrap();
assert!(!s.contains("INSERT OR IGNORE") && s.contains("verisimdb_metadata"));
assert!(!s.contains("Seed metadata from parsed schema"));
assert!(!p.contains("ON CONFLICT") && p.contains("verisimdb_metadata"));
assert!(!p.contains("Seed metadata from parsed schema"));
}

#[test]
fn test_storage_to_dialect_mapping() {
assert_eq!(
SqlDialect::from_storage("sqlite").unwrap(),
SqlDialect::Sqlite
);
assert_eq!(
SqlDialect::from_storage("postgres").unwrap(),
SqlDialect::Postgres
);
assert_eq!(
SqlDialect::from_storage("PostgreSQL").unwrap(),
SqlDialect::Postgres
);
let json_err = SqlDialect::from_storage("json").unwrap_err().to_string();
assert!(
json_err.contains("not implemented") && json_err.contains("#112"),
"json must be rejected with the #112 pointer, got: {json_err}"
);
assert!(SqlDialect::from_storage("mariadb").is_err());
}
}
Loading
Loading