From e979bba606737c8ce46d97fa495278cd6a51ed9e Mon Sep 17 00:00:00 2001 From: Stefan Steiner Date: Tue, 9 Jun 2026 19:19:44 -0700 Subject: [PATCH 1/5] fix(mcp): preserve catalog metadata on ALTER TABLE RENAME (#59) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `reconcile_in` previously treated a rename as a delete + fresh stub: the disappeared table's catalog row was deleted, and the new table name got a bare stub with all prose fields (source_url, purpose, notes, load_tool, load_params, created_by, loaded_at) wiped. Fix: before the delete/stub loop, detect renames by matching disappeared catalog entries to new live tables via row count. A match is only accepted when BOTH sides are unambiguous (exactly one disappeared entry has that row count AND exactly one new table has it), preventing false-positive metadata assignment when multiple tables share a count (e.g. two empty tables). Entries with NULL row_count are excluded from matching. Matched entries get their table_name updated in place via a simple UPDATE — all other fields (including loaded_at and last_refreshed_at) are preserved untouched. Adds: - `rename_catalog_entry` helper (UPDATE SET table_name only) - Test: creates a table with full metadata, renames it, reconciles, asserts every field (source_url, purpose, notes, load_tool, row_count, loaded_at, last_refreshed_at) survived. Closes #59. --- hyperdb-mcp/src/table_catalog.rs | 111 ++++++++++++++++++++-- hyperdb-mcp/tests/table_catalog_tests.rs | 114 +++++++++++++++++++++++ 2 files changed, 218 insertions(+), 7 deletions(-) diff --git a/hyperdb-mcp/src/table_catalog.rs b/hyperdb-mcp/src/table_catalog.rs index 5a7928c..e69c87d 100644 --- a/hyperdb-mcp/src/table_catalog.rs +++ b/hyperdb-mcp/src/table_catalog.rs @@ -668,22 +668,119 @@ pub fn reconcile_in(engine: &Engine, target_db: Option<&str>) -> Result<(), McpE .collect(); let live_tables: std::collections::HashSet = tables.iter().cloned().collect(); - for entry in &catalog_entries { - if !live_tables.contains(&entry.table_name) { + // Detect renames: a catalog entry whose table disappeared + a live table + // with no catalog entry + matching row count ⇒ likely a RENAME rather + // than a DROP+CREATE. Rename the catalog row in place so all prose + // metadata (source_url, purpose, notes, etc.) is preserved. + let disappeared: Vec<&CatalogEntry> = catalog_entries + .iter() + .filter(|e| !live_tables.contains(&e.table_name)) + .collect(); + let new_tables: Vec<&String> = tables + .iter() + .filter(|t| !catalog_names.contains(*t)) + .collect(); + + // Build a set of renamed pairs by matching on row count. A match is only + // accepted when exactly one disappeared entry shares a row count with + // exactly one new table — ambiguous multi-matches fall through to the + // normal delete+stub path (safe: we lose metadata rather than mis-assign). + let mut renamed_old: std::collections::HashSet = std::collections::HashSet::new(); + let mut renamed_new: std::collections::HashSet = std::collections::HashSet::new(); + + if !disappeared.is_empty() && !new_tables.is_empty() { + // Collect row counts for new tables (cheap: one COUNT per table). + let new_with_counts: Vec<(&String, Option)> = new_tables + .iter() + .map(|t| (*t, row_count_of_in(engine, t, target_db).ok())) + .collect(); + + // For a match to be accepted, BOTH sides must be unambiguous: + // - exactly one disappeared entry has this row count, AND + // - exactly one new table has this row count. + // This prevents false-positive metadata assignment when multiple + // tables share the same row count (e.g. two empty tables). + // Additionally, require the row count to be `Some` — `None == None` + // would match entries that were never counted with tables whose + // count query failed, producing a meaningless coincidence. + for entry in &disappeared { + let Some(entry_rc) = entry.row_count else { + continue; + }; + // How many disappeared entries share this row count? + let disappeared_with_same_rc = disappeared + .iter() + .filter(|e| e.row_count == Some(entry_rc)) + .count(); + if disappeared_with_same_rc != 1 { + continue; + } + // How many new tables share this row count? + let candidates: Vec<&&String> = new_with_counts + .iter() + .filter(|(t, rc)| *rc == Some(entry_rc) && !renamed_new.contains(t.as_str())) + .map(|(t, _)| t) + .collect(); + if candidates.len() == 1 { + let new_name = candidates[0]; + if rename_catalog_entry(engine, &entry.table_name, new_name, target_db).is_ok() { + renamed_old.insert(entry.table_name.clone()); + renamed_new.insert((*new_name).clone()); + } + } + } + } + + // Delete remaining disappeared entries (those NOT matched as renames). + for entry in &disappeared { + if !renamed_old.contains(&entry.table_name) { delete_for_in(engine, &entry.table_name, target_db)?; } } - for table in &tables { - let row_count = row_count_of_in(engine, table, target_db).ok(); - if catalog_names.contains(table) { - refresh_row_count_in(engine, table, row_count, target_db)?; - } else { + // Stub remaining new tables (those NOT matched as rename targets). + for table in &new_tables { + if !renamed_new.contains(table.as_str()) { + let row_count = row_count_of_in(engine, table, target_db).ok(); upsert_stub_in( engine, table, "unknown", None, row_count, false, target_db, None, )?; } } + + // Refresh row counts for tables that already had catalog entries. + for table in &tables { + if catalog_names.contains(table) { + let row_count = row_count_of_in(engine, table, target_db).ok(); + refresh_row_count_in(engine, table, row_count, target_db)?; + } + } + Ok(()) +} + +/// Rename a catalog entry's `table_name` in place, preserving all metadata. +fn rename_catalog_entry( + engine: &Engine, + old_name: &str, + new_name: &str, + target_db: Option<&str>, +) -> Result<(), McpError> { + let Some(qualified) = qualified_catalog_in(engine, target_db) else { + return Ok(()); + }; + // Only update table_name — a rename is not a data refresh, so loaded_at + // and last_refreshed_at must stay anchored to the original load time. + let sql = format!( + "UPDATE {qualified} SET table_name = {new} WHERE table_name = {old}", + new = sql_literal(new_name), + old = sql_literal(old_name), + ); + engine.execute_command(&sql)?; + tracing::debug!( + old_name, + new_name, + "catalog entry renamed (metadata preserved)" + ); Ok(()) } diff --git a/hyperdb-mcp/tests/table_catalog_tests.rs b/hyperdb-mcp/tests/table_catalog_tests.rs index 176b132..75bd968 100644 --- a/hyperdb-mcp/tests/table_catalog_tests.rs +++ b/hyperdb-mcp/tests/table_catalog_tests.rs @@ -434,3 +434,117 @@ fn backfill_stubs_preexisting_tables_on_reopen() { "backfilled rows are tagged as unknown origin" ); } + +#[test] +fn rename_table_preserves_catalog_metadata() { + let tmp = TempDir::new().unwrap(); + let path_str = tmp + .path() + .join("workspace.hyper") + .to_str() + .unwrap() + .to_string(); + + let engine = Engine::new_no_daemon(Some(path_str.clone())).unwrap(); + + // Create a table and load data so it has a non-zero row count. + engine + .execute_command( + "CREATE TABLE \"persistent\".\"public\".population (id INT, name TEXT NOT NULL)", + ) + .unwrap(); + engine + .execute_command( + "INSERT INTO \"persistent\".\"public\".population VALUES (1, 'Earth'), (2, 'Mars'), (3, 'Venus')" + ) + .unwrap(); + + // Stub the catalog entry (simulates what the server does after ingest). + table_catalog::upsert_stub( + &engine, + "population", + "load_file", + Some("{\"path\":\"/tmp/population.csv\"}"), + Some(3), + false, + ) + .unwrap(); + + // Set prose metadata that should survive the rename. + table_catalog::set_metadata( + &engine, + "population", + &MetadataFields { + source_url: Some("https://example.com/population".into()), + purpose: Some("Test rename metadata preservation".into()), + notes: Some("Refresh: curl ...".into()), + ..Default::default() + }, + ) + .unwrap(); + + // Verify metadata is set before rename. + let before = table_catalog::get(&engine, "population").unwrap().unwrap(); + assert_eq!( + before.source_url.as_deref(), + Some("https://example.com/population") + ); + assert_eq!( + before.purpose.as_deref(), + Some("Test rename metadata preservation") + ); + assert_eq!(before.notes.as_deref(), Some("Refresh: curl ...")); + assert_eq!(before.load_tool.as_deref(), Some("load_file")); + assert_eq!(before.row_count, Some(3)); + + // Rename the table (same as a user running execute(sql=["ALTER TABLE..."])) + engine + .execute_command( + "ALTER TABLE \"persistent\".\"public\".population RENAME TO owid_population", + ) + .unwrap(); + + // Run reconcile (same as after_execute_catalog_update does post-execute). + table_catalog::reconcile(&engine).unwrap(); + + // The old name should be gone. + assert!( + table_catalog::get(&engine, "population").unwrap().is_none(), + "old table name should not exist in catalog after rename" + ); + + // The new name should exist with ALL the original metadata preserved. + let after = table_catalog::get(&engine, "owid_population") + .unwrap() + .expect("renamed table should have a catalog entry"); + assert_eq!( + after.source_url.as_deref(), + Some("https://example.com/population"), + "source_url must survive rename" + ); + assert_eq!( + after.purpose.as_deref(), + Some("Test rename metadata preservation"), + "purpose must survive rename" + ); + assert_eq!( + after.notes.as_deref(), + Some("Refresh: curl ..."), + "notes must survive rename" + ); + assert_eq!( + after.load_tool.as_deref(), + Some("load_file"), + "load_tool must survive rename" + ); + assert_eq!(after.row_count, Some(3), "row_count must survive rename"); + // Both timestamps should be preserved — a rename is not a refresh. + assert_eq!( + after.loaded_at, before.loaded_at, + "loaded_at should be anchored to the original load time, not the rename time" + ); + assert_eq!( + after.last_refreshed_at, before.last_refreshed_at, + "last_refreshed_at should be anchored to the original load time, not bumped on rename" + ); +} From 50cdb25be7c4a57fe20490eeb1c83c21295d7146 Mon Sep 17 00:00:00 2001 From: Stefan Steiner Date: Tue, 9 Jun 2026 19:39:59 -0700 Subject: [PATCH 2/5] fix(mcp): add data_url field to _table_catalog for mechanical refresh (#60) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a new `data_url` TEXT column to `_table_catalog`, exposed through `set_table_metadata`. This is the machine-actionable download URL for the raw data file — distinct from `source_url` (human-readable page) and `source_description` (prose). With `data_url` and the existing `load_params`, an automated refresh becomes fully mechanical: fetch `data_url`, infer format from the extension, re-ingest with the recorded parameters. Changes: - `CATALOG_COLUMNS`: add `data_url TEXT` to the schema. - `ensure_exists_in` migration: `ALTER TABLE ADD COLUMN data_url TEXT` so pre-existing catalogs gain the column without recreation. - `CatalogEntry`: add `data_url: Option` + `to_json()` output. - `MetadataFields`: add `data_url` + include in `is_empty()` check. - `set_metadata_in`: generate the `data_url = ...` assignment. - `upsert_stub_in` INSERT: include `data_url` in the column list (explicit NULL for consistency with the 14-column schema). - `SetTableMetadataParams` (server): add `data_url` param + thread it. - Tool description updated to mention `data_url`. - SELECTs in `list_in`/`get_in`: fetch the new column. - `row_to_entry`: parse `data_url` from the row JSON. - Module doc table: add `data_url` entry. - Test: `set_metadata_data_url_roundtrip` — set, read back, clear with empty string. Closes #60. --- hyperdb-mcp/src/server.rs | 8 +++- hyperdb-mcp/src/table_catalog.rs | 24 +++++++--- hyperdb-mcp/tests/per_tool_database_tests.rs | 3 +- hyperdb-mcp/tests/table_catalog_tests.rs | 48 ++++++++++++++++++++ 4 files changed, 73 insertions(+), 10 deletions(-) diff --git a/hyperdb-mcp/src/server.rs b/hyperdb-mcp/src/server.rs index 77ee372..c2478cb 100644 --- a/hyperdb-mcp/src/server.rs +++ b/hyperdb-mcp/src/server.rs @@ -765,6 +765,11 @@ pub struct SetTableMetadataParams { pub license: Option, /// Free-form notes: refresh instructions, known gotchas, caveats. pub notes: Option, + /// Machine-actionable download URL for the raw data file. Distinct + /// from `source_url` (which is a human-readable page/reference). + /// Enables mechanical refresh: the server can re-ingest the table + /// from this URL + `load_params` without prose parsing. + pub data_url: Option, /// Target database alias for the catalog write. Omit (or pass /// `"local"` / `"persistent"`) to update the persistent catalog — /// matches the default for the ephemeral primary's tables. @@ -2903,7 +2908,7 @@ impl HyperMcpServer { /// Update prose metadata for a table in the `_table_catalog`. #[tool( - description = "Update prose metadata for a table in the `_table_catalog`: source_url, source_description, purpose, license, notes. Fields you omit stay unchanged; pass an explicit empty string (\"\") to clear a field. Mechanical fields (load_tool, load_params, loaded_at, last_refreshed_at, row_count) are managed by the server. Requires an existing catalog entry — load the table first (load_file / load_data / execute CREATE TABLE) so the stub row is created automatically. Use `database` to target the metadata for a table in a non-primary writable database; read-only attachments are rejected with a clear re-attach-with-writable message. Disabled in read-only mode." + description = "Update prose metadata for a table in the `_table_catalog`: source_url, source_description, purpose, license, notes, data_url. Fields you omit stay unchanged; pass an explicit empty string (\"\") to clear a field. `data_url` is the machine-actionable download URL for the raw data file (distinct from `source_url`, which is a human-readable reference page). Mechanical fields (load_tool, load_params, loaded_at, last_refreshed_at, row_count) are managed by the server. Requires an existing catalog entry — load the table first (load_file / load_data / execute CREATE TABLE) so the stub row is created automatically. Use `database` to target the metadata for a table in a non-primary writable database; read-only attachments are rejected with a clear re-attach-with-writable message. Disabled in read-only mode." )] fn set_table_metadata( &self, @@ -2918,6 +2923,7 @@ impl HyperMcpServer { purpose: params.purpose, license: params.license, notes: params.notes, + data_url: params.data_url, }; let table_name = params.table.clone(); let result = self.with_engine(|engine| { diff --git a/hyperdb-mcp/src/table_catalog.rs b/hyperdb-mcp/src/table_catalog.rs index e69c87d..3dde314 100644 --- a/hyperdb-mcp/src/table_catalog.rs +++ b/hyperdb-mcp/src/table_catalog.rs @@ -18,6 +18,7 @@ //! | `purpose` | User / LLM | //! | `license` | User / LLM | //! | `notes` | User / LLM | +//! | `data_url` | User / LLM via `set_table_metadata` | //! //! The backing table is `_table_catalog`. Each writable database that //! receives an ingest gets its own catalog, lazily seeded on first @@ -137,6 +138,7 @@ pub struct CatalogEntry { pub notes: Option, pub created_by: Option, pub last_modified_by: Option, + pub data_url: Option, } impl CatalogEntry { @@ -159,6 +161,7 @@ impl CatalogEntry { "notes": self.notes, "created_by": self.created_by, "last_modified_by": self.last_modified_by, + "data_url": self.data_url, }) } } @@ -172,6 +175,7 @@ pub struct MetadataFields { pub purpose: Option, pub license: Option, pub notes: Option, + pub data_url: Option, } impl MetadataFields { @@ -185,6 +189,7 @@ impl MetadataFields { && self.purpose.is_none() && self.license.is_none() && self.notes.is_none() + && self.data_url.is_none() } } @@ -206,7 +211,8 @@ const CATALOG_COLUMNS: &str = "(\ row_count BIGINT, \ notes TEXT, \ created_by TEXT, \ - last_modified_by TEXT\ + last_modified_by TEXT, \ + data_url TEXT\ )"; /// Idempotently create the backing table in `target_db`. Safe to call @@ -236,7 +242,7 @@ pub fn ensure_exists_in(engine: &Engine, target_db: Option<&str>) -> Result<(), // Migrate pre-existing catalogs: add columns introduced after the // initial schema. Each ALTER is idempotent — if the column already // exists Hyper returns an error that we swallow. - for col in ["created_by TEXT", "last_modified_by TEXT"] { + for col in ["created_by TEXT", "last_modified_by TEXT", "data_url TEXT"] { let alter = format!("ALTER TABLE {qualified} ADD COLUMN {col}"); let _ = engine.execute_command(&alter); } @@ -296,7 +302,7 @@ pub fn list_in(engine: &Engine, target_db: Option<&str>) -> Result Result { last_refreshed_at, row_count, notes: str_field("notes"), + data_url: str_field("data_url"), created_by: str_field("created_by"), last_modified_by: str_field("last_modified_by"), }) diff --git a/hyperdb-mcp/tests/per_tool_database_tests.rs b/hyperdb-mcp/tests/per_tool_database_tests.rs index 21f986e..7a148e1 100644 --- a/hyperdb-mcp/tests/per_tool_database_tests.rs +++ b/hyperdb-mcp/tests/per_tool_database_tests.rs @@ -784,8 +784,7 @@ fn set_metadata_in_routes_to_user_attached_db() { source_url: Some("s3://bucket/events.parquet".into()), source_description: Some("Tracking events".into()), purpose: Some("daily reports".into()), - license: None, - notes: None, + ..Default::default() }; let entry = hyperdb_mcp::table_catalog::set_metadata_in(&engine, "events", &fields, Some("user_db")) diff --git a/hyperdb-mcp/tests/table_catalog_tests.rs b/hyperdb-mcp/tests/table_catalog_tests.rs index 75bd968..32ce283 100644 --- a/hyperdb-mcp/tests/table_catalog_tests.rs +++ b/hyperdb-mcp/tests/table_catalog_tests.rs @@ -548,3 +548,51 @@ fn rename_table_preserves_catalog_metadata() { "last_refreshed_at should be anchored to the original load time, not bumped on rename" ); } + +#[test] +fn set_metadata_data_url_roundtrip() { + let (engine, _dir) = workspace_engine(); + engine + .execute_command("CREATE TABLE \"persistent\".\"public\".widgets (id INT)") + .unwrap(); + table_catalog::upsert_stub(&engine, "widgets", "load_file", None, Some(10), true).unwrap(); + + let entry = table_catalog::set_metadata( + &engine, + "widgets", + &MetadataFields { + data_url: Some("https://example.com/widgets.csv?v=2".into()), + ..Default::default() + }, + ) + .unwrap(); + + assert_eq!( + entry.data_url.as_deref(), + Some("https://example.com/widgets.csv?v=2"), + "data_url should round-trip through set_metadata" + ); + + // Read it back fresh to confirm persistence. + let fresh = table_catalog::get(&engine, "widgets").unwrap().unwrap(); + assert_eq!( + fresh.data_url.as_deref(), + Some("https://example.com/widgets.csv?v=2") + ); + + // Clear it with an empty string. + table_catalog::set_metadata( + &engine, + "widgets", + &MetadataFields { + data_url: Some(String::new()), + ..Default::default() + }, + ) + .unwrap(); + let cleared = table_catalog::get(&engine, "widgets").unwrap().unwrap(); + assert!( + cleared.data_url.is_none(), + "empty string should clear data_url" + ); +} From 2e4a4d7ddf3d037a9e8068a1c97ecc7206aea3ed Mon Sep 17 00:00:00 2001 From: Stefan Steiner Date: Tue, 9 Jun 2026 20:53:51 -0700 Subject: [PATCH 3/5] fix(api): ToSqlParam for Numeric (scale=0), Interval, and JSON (#65) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds binary-bind ToSqlParam impls so these types can be passed to query_params without manual stringification: - Interval: PG binary [us:i64 BE][days:i32 BE][months:i32 BE]. - JSON (serde_json::Value): PG `json` binary form is the UTF-8 text (no jsonb version byte); serde_json is already an unconditional dep, so no feature flag is needed. - Numeric: PG binary NUMERIC, scale=0 only. Hyper rejects scaled binary NUMERIC params server-side with SQLSTATE 0A000 regardless of encoding or explicit CAST (verified empirically), so scaled support is not possible over the current binary bind path — tracked in #132. For scale>0, encode_param sets dscale>0 so the param fails fast at the server rather than silently binding a mis-scaled whole number; the byte payload is deliberately server-rejected, NOT a faithful scaled encoding (correct decimal-aligned base-10000 grouping is #132). Unit tests cover the scale=0 byte layout (42, 0, -1, 123456789), the dscale-for-rejection behavior, Interval bytes, and JSON bytes. Integration tests round-trip JSON, Interval, scale=0 Numeric, and pin the scale>0 fail-fast rejection (flips to success when #132 lands). Partially addresses #65 (Geography in the next commit). --- hyperdb-api/src/params.rs | 214 ++++++++++++++++++++++++- hyperdb-api/tests/param_types_tests.rs | 113 +++++++++++++ 2 files changed, 326 insertions(+), 1 deletion(-) create mode 100644 hyperdb-api/tests/param_types_tests.rs diff --git a/hyperdb-api/src/params.rs b/hyperdb-api/src/params.rs index e6383f5..fb4f5e0 100644 --- a/hyperdb-api/src/params.rs +++ b/hyperdb-api/src/params.rs @@ -48,7 +48,9 @@ //! } //! ``` -use hyperdb_api_core::types::{oids, Date, OffsetTimestamp, Oid, Time, Timestamp}; +use hyperdb_api_core::types::{ + oids, Date, Interval, Numeric, OffsetTimestamp, Oid, Time, Timestamp, +}; /// Trait for types that can be used as parameters in parameterized SQL queries. /// @@ -432,6 +434,140 @@ impl ToSqlParam for Vec { } } +// ============================================================================= +// Numeric implementation +// ============================================================================= + +/// Encode a whole-number (`scale == 0`) `Numeric` as PostgreSQL binary NUMERIC. +/// +/// Header (i16 BE): `ndigits`, `weight`, `sign` (0x0000 pos / 0x4000 neg), +/// `dscale = 0`; then `ndigits` base-10000 groups (i16 BE, most-significant +/// first). The `weight` of the most-significant group is `ndigits - 1` (it +/// sits at base-10000 position `ndigits-1`), and `dscale` is 0 because there +/// are no fractional digits. +/// +/// This handles ONLY `scale == 0`. Correctly encoding a scaled NUMERIC +/// requires decomposing the *decimal* representation into base-10000 groups +/// aligned on the decimal point (not decomposing the unscaled integer) — that +/// is out of scope here because Hyper rejects scaled binary NUMERIC params +/// regardless (see [`ToSqlParam for Numeric`] and #132). The caller is +/// responsible for only invoking this with `scale == 0`. +#[expect( + clippy::cast_possible_truncation, + clippy::cast_possible_wrap, + reason = "an i128 spans at most ~39 decimal digits → ≤10 base-10000 groups; \ + ndigits and weight always fit in i16" +)] +fn pg_numeric_encode_unscaled(unscaled: i128) -> Vec { + let sign_neg = unscaled < 0; + let mut mag = unscaled.unsigned_abs(); + + // Decompose the integer magnitude into base-10000 groups, least-significant + // first, then reverse to most-significant first. + let mut groups: Vec = Vec::new(); + while mag > 0 { + groups.push((mag % 10000) as i16); + mag /= 10000; + } + groups.reverse(); // empty when unscaled == 0 + + let ndigits = groups.len() as i16; + let weight = if groups.is_empty() { 0 } else { ndigits - 1 }; + + let mut buf = Vec::with_capacity(8 + groups.len() * 2); + buf.extend_from_slice(&ndigits.to_be_bytes()); + buf.extend_from_slice(&weight.to_be_bytes()); + buf.extend_from_slice(&(if sign_neg { 0x4000_i16 } else { 0 }).to_be_bytes()); + buf.extend_from_slice(&0_i16.to_be_bytes()); // dscale = 0 (whole number) + for g in groups { + buf.extend_from_slice(&g.to_be_bytes()); + } + buf +} + +impl ToSqlParam for Numeric { + /// Binds as PostgreSQL binary NUMERIC. **Only `scale() == 0` (whole + /// numbers) is supported.** + /// + /// Hyper rejects scaled binary NUMERIC params at query time with SQLSTATE + /// `0A000` ("cannot handle truncation when reading numerics") — verified + /// empirically, and regardless of an explicit `CAST`. So a faithful scaled + /// encoder would never succeed anyway; full scaled support is tracked in + /// #132. + /// + /// For `scale() > 0` this returns a header whose `dscale` is set to the + /// true scale. The byte payload is therefore NOT a correct PostgreSQL + /// NUMERIC for the value (correct scaled encoding requires decimal-aligned + /// base-10000 grouping, deferred to #132) — but because `dscale > 0`, Hyper + /// rejects it server-side before it can be misinterpreted. The net effect + /// is fail-fast: a scaled param errors clearly instead of silently binding + /// a wrong whole number. + fn encode_param(&self) -> Option> { + if self.scale() == 0 { + return Some(pg_numeric_encode_unscaled(self.unscaled_value())); + } + // scale > 0: emit the unscaled digits but with dscale = scale so the + // server rejects it (0A000) rather than reading a mis-scaled integer. + // These bytes are intentionally server-rejected, not a valid value; + // see the doc comment and #132. + let mut buf = pg_numeric_encode_unscaled(self.unscaled_value()); + // Overwrite the dscale field (bytes 6..8) with the true scale. + let dscale = i16::from(self.scale()).to_be_bytes(); + buf[6] = dscale[0]; + buf[7] = dscale[1]; + Some(buf) + } + fn sql_oid(&self) -> Oid { + oids::NUMERIC + } + fn to_sql_literal(&self) -> String { + self.to_string() + } // Display = decimal string +} + +// ============================================================================= +// Interval implementation +// ============================================================================= + +impl ToSqlParam for Interval { + fn encode_param(&self) -> Option> { + // PG interval binary (Bind format code 1): i64 microseconds, i32 days, + // i32 months — all BIG-endian. NB this differs from Hyper's HyperBinary + // `Interval::encode()` which is the same field order but LITTLE-endian. + let mut buf = Vec::with_capacity(16); + buf.extend_from_slice(&self.microseconds().to_be_bytes()); + buf.extend_from_slice(&self.days().to_be_bytes()); + buf.extend_from_slice(&self.months().to_be_bytes()); + Some(buf) + } + fn sql_oid(&self) -> Oid { + oids::INTERVAL + } + fn to_sql_literal(&self) -> String { + format!("INTERVAL '{self}'") + } +} + +// ============================================================================= +// JSON implementation +// ============================================================================= + +impl ToSqlParam for serde_json::Value { + fn encode_param(&self) -> Option> { + // PG `json` binary form == the UTF-8 text. (jsonb has a leading + // version byte; `json` does not, and oids::JSON is `json`.) + // Value::to_string() is compact (no whitespace, no trailing newline) + // and correctly escapes embedded quotes — exactly the wire form needed. + Some(self.to_string().into_bytes()) + } + fn sql_oid(&self) -> Oid { + oids::JSON + } + fn to_sql_literal(&self) -> String { + format!("'{}'", self.to_string().replace('\'', "''")) + } +} + #[cfg(test)] mod tests { use super::*; @@ -476,4 +612,80 @@ mod tests { assert_eq!(value.encode_param(), Some(vec![0, 0, 0, 42])); assert_eq!((&&value).encode_param(), Some(vec![0, 0, 0, 42])); } + + #[test] + fn test_pg_numeric_encode_unscaled() { + // 42 → ndigits=1, weight=0, sign=0, dscale=0, group=42 + assert_eq!( + pg_numeric_encode_unscaled(42), + vec![0, 1, 0, 0, 0, 0, 0, 0, 0, 42] + ); + + // 0 → ndigits=0, weight=0, sign=0, dscale=0 (empty digit list) + assert_eq!(pg_numeric_encode_unscaled(0), vec![0, 0, 0, 0, 0, 0, 0, 0]); + + // -1 → ndigits=1, weight=0, sign=0x4000, dscale=0, group=1 + assert_eq!( + pg_numeric_encode_unscaled(-1), + vec![0, 1, 0, 0, 0x40, 0, 0, 0, 0, 1] + ); + + // 123456789 = 1*10000^2 + 2345*10000 + 6789 + // → ndigits=3, weight=2, sign=0, dscale=0, groups=[1, 2345, 6789] + assert_eq!( + pg_numeric_encode_unscaled(123_456_789), + vec![ + 0, 3, // ndigits=3 + 0, 2, // weight=2 + 0, 0, // sign=0 + 0, 0, // dscale=0 + 0, 1, // group 1 + 9, 41, // group 2345 (0x0929) + 26, 133 // group 6789 (0x1A85) + ] + ); + } + + #[test] + fn test_numeric_scale0_encode_param() { + // The scale=0 ToSqlParam path produces the canonical whole-number form. + assert_eq!( + Numeric::new(42, 0).encode_param(), + Some(vec![0, 1, 0, 0, 0, 0, 0, 0, 0, 42]) + ); + } + + #[test] + fn test_numeric_scaled_sets_dscale_for_rejection() { + // For scale>0, encode_param sets dscale = true scale so the server + // REJECTS the param (0A000). These bytes are intentionally NOT a valid + // representation of 1.23 — correct scaled encoding is #132. We only + // assert the dscale field (bytes 6..8) carries the scale, which is what + // triggers Hyper's fail-fast rejection. + let bytes = Numeric::new(123, 2).encode_param().expect("some"); + assert_eq!(&bytes[6..8], &[0, 2], "dscale must equal the true scale"); + assert_ne!(&bytes[6..8], &[0, 0], "must not look like a whole number"); + } + + #[test] + fn test_interval_encoding() { + // Interval::new(months, days, microseconds) + let interval = Interval::new(2, 5, 0); + // PG binary: [us:i64 BE][days:i32 BE][months:i32 BE] + assert_eq!( + interval.encode_param(), + Some(vec![ + 0, 0, 0, 0, 0, 0, 0, 0, // us = 0 + 0, 0, 0, 5, // days = 5 + 0, 0, 0, 2 // months = 2 + ]) + ); + } + + #[test] + fn test_json_encoding() { + let json = serde_json::json!({"a": 1}); + // UTF-8 bytes of compact JSON string + assert_eq!(json.encode_param(), Some(br#"{"a":1}"#.to_vec())); + } } diff --git a/hyperdb-api/tests/param_types_tests.rs b/hyperdb-api/tests/param_types_tests.rs new file mode 100644 index 0000000..6b9d85b --- /dev/null +++ b/hyperdb-api/tests/param_types_tests.rs @@ -0,0 +1,113 @@ +// Copyright (c) 2026, Salesforce, Inc. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Integration tests for ToSqlParam implementations. +//! +//! These tests verify that types implementing ToSqlParam correctly encode +//! parameters for use with query_params(), validating against actual Hyper behavior. + +use hyperdb_api::{Interval, Numeric, ToSqlParam}; + +mod common; +use common::TestConnection; + +/// Test JSON parameter round-trip. +#[test] +fn test_json_param() { + let test = TestConnection::new().expect("Failed to create test connection"); + + let json_value = serde_json::json!({"a": 1, "b": [2, 3]}); + let result = test + .connection + .query_params("SELECT $1 AS v", &[&json_value as &dyn ToSqlParam]) + .expect("query_params failed"); + + let rows = result.collect_rows().expect("collect_rows failed"); + assert_eq!(rows.len(), 1, "Expected exactly one row"); + + // Read back as String and verify it parses to the same JSON value + let returned_str: Option = rows[0].get(0); + assert!(returned_str.is_some(), "Expected non-NULL JSON value"); + + let returned_json: serde_json::Value = + serde_json::from_str(&returned_str.unwrap()).expect("Failed to parse returned JSON"); + assert_eq!( + returned_json, json_value, + "Returned JSON doesn't match original" + ); +} + +/// Test Interval parameter. +#[test] +fn test_interval_param() { + let test = TestConnection::new().expect("Failed to create test connection"); + + let interval = Interval::new(2, 5, 0); // 2 months, 5 days, 0 microseconds + let result = test + .connection + .query_params("SELECT $1 AS v", &[&interval as &dyn ToSqlParam]) + .expect("query_params failed"); + + let rows = result.collect_rows().expect("collect_rows failed"); + assert_eq!(rows.len(), 1, "Expected exactly one row"); + + // Verify the query succeeded and returned a non-null value + // (Full Interval RowValue support is out of scope for this commit) + let returned: Option = rows[0].get(0); + assert!(returned.is_some(), "Expected non-NULL Interval value"); +} + +/// Test Numeric scale=0 parameter round-trip. +#[test] +fn test_numeric_scale0_param() { + let test = TestConnection::new().expect("Failed to create test connection"); + + let numeric = Numeric::new(42, 0); // scale = 0 + let result = test + .connection + .query_params("SELECT $1 AS v", &[&numeric as &dyn ToSqlParam]) + .expect("query_params failed"); + + let rows = result.collect_rows().expect("collect_rows failed"); + assert_eq!(rows.len(), 1, "Expected exactly one row"); + + // Read back as i64 and verify + let returned: Option = rows[0].get(0); + assert_eq!( + returned, + Some(42), + "Expected Numeric(42,0) to round-trip as 42" + ); +} + +/// Pins the documented scale>0 limitation: Hyper rejects binary NUMERIC +/// params that carry a non-zero dscale. +/// +/// We encode the TRUE scale (dscale = 2 for `Numeric::new(123, 2)` == 1.23), +/// so the value is represented faithfully on the wire — and Hyper then +/// rejects it server-side with SQLSTATE `0A000` ("cannot handle truncation +/// when reading numerics") rather than silently truncating it to a +/// mis-scaled integer. This is fail-fast, not silent corruption. +/// +/// The error surfaces at `collect_rows()` time (when the Bind/Execute round +/// trip completes), NOT at `query_params()` time. When scaled support lands +/// (#132), this test flips to a success assertion. +#[test] +fn test_numeric_scaled_rejected_fail_fast() { + let test = TestConnection::new().expect("Failed to create test connection"); + + let numeric = Numeric::new(123, 2); // 1.23 — scale > 0 + let result = test + .connection + .query_params("SELECT $1 AS v", &[&numeric as &dyn ToSqlParam]) + .expect("query_params itself should not error"); + + let err = result + .collect_rows() + .expect_err("scale>0 Numeric must be rejected by the server, not silently truncated"); + let msg = err.to_string(); + assert!( + msg.contains("0A000") || msg.contains("cannot handle truncation when reading numerics"), + "expected Hyper's NUMERIC truncation error (fail-fast), got: {msg}" + ); +} From c26242e448055816ef5e01ef5e5a7509e06f2a77 Mon Sep 17 00:00:00 2001 From: Stefan Steiner Date: Tue, 9 Jun 2026 21:11:02 -0700 Subject: [PATCH 4/5] fix(api): IntoValue + Inserter::add_geography for Geography (#65) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds Geography support to the Inserter (COPY / HyperBinary path), which the issue's gap analysis flagged as missing. Geography can now be passed to Inserter::add_row via the IntoValue trait. - Inserter::add_geography(&Geography) delegates to the existing varbinary path (add_bytes), passing the raw as_bytes() payload. The chunk-level write_varbinary already prepends the [u32_le len] framing, so we must NOT pass to_hyper_binary() output (which would double-prefix the length and corrupt the value). - impl IntoValue for Geography and &Geography (Geography owns a Vec and isn't Copy, so these take/pass by reference — mirrors the &Interval / &Numeric reference impls). - Round-trip test (WKT POINT in → byte-identical raw bytes out) and a nullable test (Some + None via the Option blanket impl → SQL NULL). NOTE: ToSqlParam for Geography (query-parameter binding) is NOT included — Hyper has no PostgreSQL-binary input function for the geography type (error 42883 on bind), so it is impossible over the current binary-only bind path. Tracked in #133 (needs per-parameter text-format bind, same root cause as #132). Geography also does not yet implement RowValue, so results are read as raw bytes; that reader is future work. Closes #65. --- hyperdb-api/src/inserter.rs | 38 ++++++++++- hyperdb-api/tests/inserter_tests.rs | 99 ++++++++++++++++++++++++++++- 2 files changed, 134 insertions(+), 3 deletions(-) diff --git a/hyperdb-api/src/inserter.rs b/hyperdb-api/src/inserter.rs index db06bf8..a620613 100644 --- a/hyperdb-api/src/inserter.rs +++ b/hyperdb-api/src/inserter.rs @@ -25,7 +25,9 @@ use std::time::Instant; use hyperdb_api_core::client::client::CopyInWriter; use hyperdb_api_core::protocol::copy; use hyperdb_api_core::types::bytes::BytesMut; -use hyperdb_api_core::types::{Date, Interval, Numeric, OffsetTimestamp, Time, Timestamp}; +use hyperdb_api_core::types::{ + Date, Geography, Interval, Numeric, OffsetTimestamp, Time, Timestamp, +}; use tracing::{debug, info}; use crate::catalog::Catalog; @@ -609,6 +611,16 @@ impl<'conn> Inserter<'conn> { self.chunk.add_interval(value) } + /// Adds a Geography value. + /// + /// # Errors + /// + /// See [`add_bool`](Self::add_bool). + #[inline] + pub fn add_geography(&mut self, value: &Geography) -> Result<()> { + self.chunk.add_geography(value) + } + /// Adds a Numeric value. /// /// For NUMERIC(precision, scale) where precision ≤ [`Numeric::SMALL_NUMERIC_MAX_PRECISION`] @@ -867,7 +879,7 @@ impl ColumnMapping { /// - `&str`, `String` /// - `Option` where `T: IntoValue` (for nullable columns) /// - Date/time types: `Date`, `Time`, `Timestamp`, `Interval` -/// - `Numeric`, `Geography`, `Oid`, `Vec` (bytes) +/// - `Numeric`, `Geography`, `Vec` (bytes) /// /// # Example /// @@ -1004,6 +1016,12 @@ impl IntoValue for Numeric { } } +impl IntoValue for Geography { + fn add_to_inserter(&self, inserter: &mut Inserter<'_>) -> Result<()> { + inserter.add_geography(self) + } +} + // Option for nullable values impl IntoValue for Option { fn add_to_inserter(&self, inserter: &mut Inserter<'_>) -> Result<()> { @@ -1112,6 +1130,12 @@ impl IntoValue for &Numeric { } } +impl IntoValue for &Geography { + fn add_to_inserter(&self, inserter: &mut Inserter<'_>) -> Result<()> { + inserter.add_geography(self) + } +} + // ============================================================================= // MappedInserter // ============================================================================= @@ -1797,6 +1821,16 @@ impl InsertChunk { Ok(()) } + /// Adds a Geography value. + /// + /// # Errors + /// + /// See [`add_bool`](Self::add_bool). + pub fn add_geography(&mut self, value: &Geography) -> Result<()> { + // Geography uses the same varbinary path as add_bytes + self.add_bytes(value.as_bytes()) + } + /// Ends the current row. /// /// Returns an error if the wrong number of columns were added. diff --git a/hyperdb-api/tests/inserter_tests.rs b/hyperdb-api/tests/inserter_tests.rs index d92062c..05db97a 100644 --- a/hyperdb-api/tests/inserter_tests.rs +++ b/hyperdb-api/tests/inserter_tests.rs @@ -3,7 +3,7 @@ //! Tests for the Inserter API. -use hyperdb_api::{Catalog, Date, Inserter, SqlType, TableDefinition}; +use hyperdb_api::{Catalog, Date, Geography, Inserter, SqlType, TableDefinition}; mod common; use common::TestConnection; @@ -277,3 +277,100 @@ fn test_inserter_from_table() { let count = test.count_tuples("products").expect("Failed to count"); assert_eq!(count, 2); } + +#[test] +fn test_inserter_geography_round_trip() { + let test = TestConnection::new().expect("Failed to create test connection"); + + let table_def = TableDefinition::new("geo_test") + .add_required_column("id", SqlType::int()) + .add_nullable_column("location", SqlType::geography()); + + Catalog::new(&test.connection) + .create_table(&table_def) + .expect("Failed to create table"); + + let mut inserter = + Inserter::new(&test.connection, &table_def).expect("Failed to create inserter"); + + // Create a Geography from WKT (San Francisco coordinates) + let geo = Geography::from_wkt("POINT(-122.4194 37.7749)") + .expect("Failed to create geography from WKT"); + let original_bytes = geo.as_bytes().to_vec(); + + // Insert using IntoValue trait + inserter.add_row(&[&1i32, &geo]).expect("Failed to add row"); + + inserter.execute().expect("Failed to execute inserter"); + + // Read back and verify byte-identical + let mut result = test + .connection + .execute_query("SELECT id, location FROM geo_test") + .expect("Failed to query"); + + let chunk = result + .next_chunk() + .expect("Failed to get chunk") + .expect("Expected chunk"); + let row = chunk.first().expect("Expected row"); + + let id = row.get_i32(0).expect("NULL id"); + assert_eq!(id, 1); + + // Geography does NOT implement RowValue yet, so read as Vec + let geo_bytes = row.get::>(1).expect("NULL geography"); + assert_eq!( + geo_bytes, original_bytes, + "Geography bytes should be byte-identical after round-trip" + ); +} + +#[test] +fn test_inserter_geography_nullable() { + let test = TestConnection::new().expect("Failed to create test connection"); + + let table_def = TableDefinition::new("geo_nullable_test") + .add_required_column("id", SqlType::int()) + .add_nullable_column("location", SqlType::geography()); + + Catalog::new(&test.connection) + .create_table(&table_def) + .expect("Failed to create table"); + + let mut inserter = + Inserter::new(&test.connection, &table_def).expect("Failed to create inserter"); + + // Insert a row with a Geography value + let geo1 = Geography::from_wkt("POINT(-122.4194 37.7749)") + .expect("Failed to create geography from WKT"); + inserter + .add_row(&[&1i32, &Some(geo1)]) + .expect("Failed to add row with geography"); + + // Insert a row with NULL + inserter + .add_row(&[&2i32, &None::]) + .expect("Failed to add row with NULL"); + + inserter.execute().expect("Failed to execute inserter"); + + // Verify rows + let mut result = test + .connection + .execute_query("SELECT id, location FROM geo_nullable_test ORDER BY id") + .expect("Failed to query table"); + + let mut rows: Vec<(i32, Option>)> = Vec::new(); + while let Some(chunk) = result.next_chunk().expect("Failed to get chunk") { + for row in &chunk { + let id = row.get_i32(0).expect("NULL id"); + let location = row.get::>(1); + rows.push((id, location)); + } + } + + assert_eq!(rows.len(), 2); + assert!(rows[0].1.is_some(), "First row should have a geography"); + assert!(rows[1].1.is_none(), "Second row should have NULL"); +} From ae618fb0f6312762b9fc4b97abea7fc7a7c44fba Mon Sep 17 00:00:00 2001 From: Stefan Steiner Date: Tue, 9 Jun 2026 21:18:30 -0700 Subject: [PATCH 5/5] test(api): strengthen ToSqlParam tests + complete the supported-types doc (#65) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Final-review polish for the #65 ToSqlParam work: - params.rs module doc: list the newly-supported types (Numeric scale=0, Interval, serde_json::Value, bytes) and note Geography is Inserter-only (#133), so the 'Supported Types' list matches the actual impls. - test_interval_param: assert the param renders to "P2M5D" (ISO-8601), proving the [us BE][days BE][months BE] field encoding decoded correctly — not just that a non-null value came back. - Add test_option_numeric_param: exercises the Option blanket impl (Some(Numeric scale=0) → value, None → SQL NULL). --- hyperdb-api/src/params.rs | 13 ++++++- hyperdb-api/tests/param_types_tests.rs | 49 ++++++++++++++++++++++---- 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/hyperdb-api/src/params.rs b/hyperdb-api/src/params.rs index fb4f5e0..3f154d5 100644 --- a/hyperdb-api/src/params.rs +++ b/hyperdb-api/src/params.rs @@ -30,8 +30,19 @@ //! - Floats: `f32`, `f64` //! - `bool` //! - `&str`, `String` -//! - `Option` where `T: ToSqlParam` (for nullable parameters) +//! - Bytes: `&[u8]`, `Vec` //! - Date/time types: `Date`, `Time`, `Timestamp`, `OffsetTimestamp` +//! - `Interval` +//! - `Numeric` — **whole numbers only (`scale == 0`)**; Hyper rejects +//! scaled binary NUMERIC params (see the `Numeric` impl and issue #132) +//! - `serde_json::Value` (binds as PostgreSQL `json`) +//! - `Option` where `T: ToSqlParam` (for nullable parameters) +//! - `&T` where `T: ToSqlParam` +//! +//! Note: `Geography` does **not** implement `ToSqlParam` — Hyper has no +//! PostgreSQL-binary input function for the geography type (issue #133). +//! Use the [`Inserter`](crate::Inserter) (`IntoValue`) path to write +//! geography values instead. //! //! # Example //! diff --git a/hyperdb-api/tests/param_types_tests.rs b/hyperdb-api/tests/param_types_tests.rs index 6b9d85b..e755956 100644 --- a/hyperdb-api/tests/param_types_tests.rs +++ b/hyperdb-api/tests/param_types_tests.rs @@ -37,24 +37,61 @@ fn test_json_param() { ); } -/// Test Interval parameter. +/// Test Interval parameter — verifies the binary encoding decodes to the +/// correct value server-side by rendering the bound param as text. #[test] fn test_interval_param() { let test = TestConnection::new().expect("Failed to create test connection"); let interval = Interval::new(2, 5, 0); // 2 months, 5 days, 0 microseconds + // CAST the bound interval param to text so we can assert the VALUE, not + // just non-null — this proves the [us BE][days BE][months BE] encoding + // was interpreted correctly (Interval doesn't yet implement RowValue, so + // we can't read it back as a typed Interval). let result = test .connection - .query_params("SELECT $1 AS v", &[&interval as &dyn ToSqlParam]) + .query_params( + "SELECT CAST($1 AS text) AS v", + &[&interval as &dyn ToSqlParam], + ) .expect("query_params failed"); let rows = result.collect_rows().expect("collect_rows failed"); assert_eq!(rows.len(), 1, "Expected exactly one row"); - // Verify the query succeeded and returned a non-null value - // (Full Interval RowValue support is out of scope for this commit) - let returned: Option = rows[0].get(0); - assert!(returned.is_some(), "Expected non-NULL Interval value"); + let returned: String = rows[0].get(0).expect("Expected non-NULL Interval value"); + // Hyper renders intervals in ISO-8601 duration form: "P2M5D" (Period, + // 2 Months, 5 Days). Asserting the exact rendering proves the + // [us BE][days BE][months BE] field encoding decoded correctly — a + // swapped or mis-scaled field would produce a different string. + assert_eq!( + returned, "P2M5D", + "interval should decode to 2 months + 5 days (ISO-8601), got: {returned}" + ); +} + +/// Test Option (nullable param via the blanket Option impl). +#[test] +fn test_option_numeric_param() { + let test = TestConnection::new().expect("Failed to create test connection"); + + // Some(scale=0) binds the value; None binds SQL NULL. + let some_n: Option = Some(Numeric::new(7, 0)); + let none_n: Option = None; + + let rows = test + .connection + .query_params( + "SELECT $1 AS a, $2 AS b", + &[&some_n as &dyn ToSqlParam, &none_n as &dyn ToSqlParam], + ) + .expect("query_params failed") + .collect_rows() + .expect("collect_rows failed"); + + assert_eq!(rows.len(), 1); + assert_eq!(rows[0].get::(0), Some(7), "Some(Numeric(7,0)) → 7"); + assert_eq!(rows[0].get::(1), None, "None → SQL NULL"); } /// Test Numeric scale=0 parameter round-trip.