From 84906995d7a351c69b02540022fd261398e3bade Mon Sep 17 00:00:00 2001 From: Admnwk <220553748+Admnwk@users.noreply.github.com> Date: Fri, 26 Jun 2026 13:06:37 -0300 Subject: [PATCH 1/3] fix(aggregate): validate spec field names before building the scan/SQL Tier-3 aggregation (AdsAggregate, #113) resolved each spec's field with field_index()/name concatenation and never checked the result: - Native wire path (session.cpp): an unknown field (field_index -> -1) was folded as COUNT(*), so `SUM:NOPE` silently returned the row count with AE_SUCCESS; an empty field on a non-COUNT function did the same. - SQLite/PostgreSQL push-down: the field name was concatenated straight into TOTAL("") / SUM(). A double-quoted token that is not a real column is silently treated as a string literal by SQLite (TOTAL("NOPE") -> 0), and a quote in the name breaks out of the identifier -> SQL injection surface. Validate every spec up front against the table schema: reject an unknown field, reject an empty field for any function other than COUNT, and emit SQL only with the canonical, quoted column name. Rejection returns a clear error instead of a wrong total. Adds regression tests on all three paths (native wire, SQLite, and PostgreSQL): unknown field, injection-shaped field, and empty non-COUNT field are each rejected. Full suite 872/872. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/abi/ace_exports.cpp | 20 ++++++++++- src/network/session.cpp | 35 ++++++++++++++----- src/sql_backend/postgres_connection.cpp | 26 +++++++++++--- tests/unit/abi_aggregate_postgres_test.cpp | 25 ++++++++++++++ tests/unit/abi_aggregate_sqlite_test.cpp | 40 ++++++++++++++++++++++ tests/unit/abi_aggregate_test.cpp | 39 +++++++++++++++++++++ 6 files changed, 171 insertions(+), 14 deletions(-) diff --git a/src/abi/ace_exports.cpp b/src/abi/ace_exports.cpp index ddbe9249..048dbaf0 100644 --- a/src/abi/ace_exports.cpp +++ b/src/abi/ace_exports.cpp @@ -893,11 +893,29 @@ UNSIGNED32 sqlite_aggregate( return false; }; + // Resolve a spec's field to its canonical, schema-validated column name. + // An unknown name must be rejected, never concatenated into the SQL: a + // double-quoted token that is not a real column is silently treated as a + // string literal by SQLite (TOTAL("NOPE") -> 0), and a quote in the name + // would break out of the identifier. Empty field is valid only for COUNT. + auto resolve_col = [&](const openads::engine::AggSpec& s, + std::string& col) -> bool { + if (s.field.empty()) + return s.fn == openads::engine::AggFn::Count; + for (const auto& f : st->fields) { + if (iequal(f.name, s.field)) { col = "\"" + f.name + "\""; return true; } + } + return false; + }; + std::string sql = "SELECT "; for (std::size_t i = 0; i < specs->size(); ++i) { if (i) sql += ", "; const auto& s = (*specs)[i]; - const std::string col = "\"" + s.field + "\""; + std::string col; + if (!resolve_col(s, col)) + return fail(openads::AE_INTERNAL_ERROR, + ("sqlite_aggregate: invalid field " + s.field).c_str()); switch (s.fn) { case openads::engine::AggFn::Count: sql += s.field.empty() ? "COUNT(*)" : ("COUNT(" + col + ")"); diff --git a/src/network/session.cpp b/src/network/session.cpp index c13e6892..16a28579 100644 --- a/src/network/session.cpp +++ b/src/network/session.cpp @@ -1866,19 +1866,36 @@ DispatchResult Session::dispatch(const Frame& f) { std::vector fidx; accs.reserve(specs.size()); fidx.reserve(specs.size()); + // Validate every spec before touching the table: an unknown + // field name (field_index -> -1) must be rejected, never folded + // as COUNT(*) -- that would silently return the row count for a + // typo'd or injected field. Only COUNT may omit the field. + bool specs_ok = true; for (const auto& s : specs) { - std::int32_t fi = - s.field.empty() ? -1 : tbl->field_index(s.field); - bool numeric = false; - if (fi >= 0) { - const auto& fd = tbl->field_descriptor( - static_cast(fi)); - numeric = field_is_numeric(fd.type); + const auto fn = static_cast(s.fn); + if (s.field.empty()) { + if (fn != openads::engine::AggFn::Count) { + reply = err("Aggregate: empty field only valid for " + "COUNT"); + specs_ok = false; + break; + } + fidx.push_back(-1); + accs.emplace_back(fn, false); + continue; + } + std::int32_t fi = tbl->field_index(s.field); + if (fi < 0) { + reply = err("Aggregate: unknown field " + s.field); + specs_ok = false; + break; } - accs.emplace_back( - static_cast(s.fn), numeric); + const auto& fd = + tbl->field_descriptor(static_cast(fi)); + accs.emplace_back(fn, field_is_numeric(fd.type)); fidx.push_back(fi); } + if (!specs_ok) break; // Scan the whole table once, restoring the cursor afterwards. std::uint32_t saved = tbl->recno(); diff --git a/src/sql_backend/postgres_connection.cpp b/src/sql_backend/postgres_connection.cpp index 485ed40b..0223c5e4 100644 --- a/src/sql_backend/postgres_connection.cpp +++ b/src/sql_backend/postgres_connection.cpp @@ -428,14 +428,32 @@ PostgresConnection::aggregate(PostgresTable* tbl, return false; }; + // Resolve a spec's field to its canonical, schema-validated column name and + // quote it. An unknown name must be rejected, never concatenated raw: a + // crafted field could otherwise inject SQL. Empty field is only for COUNT. + auto resolve_col = [&](const engine::AggSpec& s, std::string& col) -> bool { + if (s.field.empty()) return s.fn == engine::AggFn::Count; + for (const auto& f : tbl->fields) { + if (f.name.size() != s.field.size()) continue; + bool same = true; + for (std::size_t j = 0; j < s.field.size(); ++j) + if (std::toupper(static_cast(f.name[j])) != + std::toupper(static_cast(s.field[j]))) { + same = false; break; + } + if (same) { col = quote_ident(f.name); return true; } + } + return false; + }; + std::string sql = "SELECT "; for (std::size_t i = 0; i < specs.size(); ++i) { if (i) sql += ", "; const auto& s = specs[i]; - // Leave the column unquoted so PostgreSQL case-folds it (the ABI hands - // field names in upper case, e.g. "QTY", but the column is "qty"); - // this matches how the translated WHERE refers to the same column. - const std::string col = s.field; + std::string col; + if (!resolve_col(s, col)) + return util::Error{5001, 0, + "invalid postgres aggregate field: " + s.field, ""}; switch (s.fn) { case engine::AggFn::Count: sql += s.field.empty() ? "COUNT(*)" : ("COUNT(" + col + ")"); diff --git a/tests/unit/abi_aggregate_postgres_test.cpp b/tests/unit/abi_aggregate_postgres_test.cpp index 5e18263c..0b7c3010 100644 --- a/tests/unit/abi_aggregate_postgres_test.cpp +++ b/tests/unit/abi_aggregate_postgres_test.cpp @@ -133,6 +133,31 @@ TEST_CASE("Tier-3 push-down: PostgreSQL untranslatable FOR declines") { CHECK(hRes == 0); } +// Regression for the Tier-3 review (#113): the field name must be validated +// against the cached schema before it is concatenated into the SQL, otherwise +// a typo'd or injection-shaped field reaches PostgreSQL raw. +TEST_CASE("Tier-3 push-down: PostgreSQL rejects an unknown aggregate field") { + PgFixture fx; + fx.open(); + + UNSIGNED8 forc[] = ""; + UNSIGNED8 spec[] = "SUM:NOPE"; + ADSHANDLE hRes = 0; + CHECK(AdsAggregate(fx.hTable, forc, spec, &hRes) != AE_SUCCESS); + CHECK(hRes == 0); +} + +TEST_CASE("Tier-3 push-down: PostgreSQL rejects empty field for non-COUNT") { + PgFixture fx; + fx.open(); + + UNSIGNED8 forc[] = ""; + UNSIGNED8 spec[] = "SUM:"; + ADSHANDLE hRes = 0; + CHECK(AdsAggregate(fx.hTable, forc, spec, &hRes) != AE_SUCCESS); + CHECK(hRes == 0); +} + #else TEST_CASE("Tier-3 aggregate push-down: postgresql backend disabled at compile time") { diff --git a/tests/unit/abi_aggregate_sqlite_test.cpp b/tests/unit/abi_aggregate_sqlite_test.cpp index 9160397c..01d8499e 100644 --- a/tests/unit/abi_aggregate_sqlite_test.cpp +++ b/tests/unit/abi_aggregate_sqlite_test.cpp @@ -121,6 +121,46 @@ TEST_CASE("Tier-3 push-down: SQLite zero matches -> 0 / 0 / empty") { REQUIRE(AdsAggregateClose(hRes) == AE_SUCCESS); } +// Regression for the Tier-3 review (#113): the SQLite push-down concatenated +// the spec's field name straight into `TOTAL("")` etc. A field that is +// not a real column (a typo, or an injection probe carrying a quote) must be +// rejected by validating against the cached schema BEFORE any SQL is built -- +// never reach the backend as a raw identifier. +TEST_CASE("Tier-3 push-down: SQLite rejects an unknown aggregate field") { + SqliteFixture fx; + fx.open(); + + UNSIGNED8 forc[] = ""; + UNSIGNED8 spec[] = "SUM:NOPE"; // NOPE is not a column of items + ADSHANDLE hRes = 0; + CHECK(AdsAggregate(fx.hTable, forc, spec, &hRes) != AE_SUCCESS); + CHECK(hRes == 0); +} + +TEST_CASE("Tier-3 push-down: SQLite rejects an injection-shaped field name") { + SqliteFixture fx; + fx.open(); + + UNSIGNED8 forc[] = ""; + // A quote in the field name would break out of the quoted identifier if it + // ever reached SQL. Validation must reject it up front. + UNSIGNED8 spec[] = "MIN:nm\")--"; + ADSHANDLE hRes = 0; + CHECK(AdsAggregate(fx.hTable, forc, spec, &hRes) != AE_SUCCESS); + CHECK(hRes == 0); +} + +TEST_CASE("Tier-3 push-down: SQLite rejects empty field for non-COUNT") { + SqliteFixture fx; + fx.open(); + + UNSIGNED8 forc[] = ""; + UNSIGNED8 spec[] = "SUM:"; // empty field, non-COUNT + ADSHANDLE hRes = 0; + CHECK(AdsAggregate(fx.hTable, forc, spec, &hRes) != AE_SUCCESS); + CHECK(hRes == 0); +} + TEST_CASE("Tier-3 push-down: untranslatable FOR declines (caller falls back)") { SqliteFixture fx; fx.open(); diff --git a/tests/unit/abi_aggregate_test.cpp b/tests/unit/abi_aggregate_test.cpp index eec4c582..1e76cbd7 100644 --- a/tests/unit/abi_aggregate_test.cpp +++ b/tests/unit/abi_aggregate_test.cpp @@ -171,6 +171,45 @@ TEST_CASE("AdsAggregate remote wire: COUNT/SUM/AVG/MIN/MAX over a FOR predicate" CHECK(AdsAggregateCount(hRes, &dummy) != AE_SUCCESS); // handle freed } +// ── 4. Remote wire: an unknown field must be rejected, not silently counted ── +// Regression for the Tier-3 review (#113): the server resolved each spec's +// field with field_index(), which returns -1 for a name that isn't in the +// table -- and the scan loop treated fidx < 0 as COUNT(*). So `SUM:NOPE` +// silently returned the *row count* as a numeric total with AE_SUCCESS, +// masking a typo'd or injected field name. It must fail instead. +TEST_CASE("AdsAggregate remote wire: unknown field is rejected, not silent COUNT") { + agg_wipe(); + auto dir = agg_tmp_dir(); + seed_agg_fixture(dir); + + RemoteFixture fx; + fx.open(dir); + + UNSIGNED8 forc[] = ""; + UNSIGNED8 spec[] = "SUM:NOPE"; // NOPE is not a column of agg.dbf + ADSHANDLE hRes = 0; + CHECK(AdsAggregate(fx.hTable, forc, spec, &hRes) != AE_SUCCESS); + CHECK(hRes == 0); +} + +// ── 5. Remote wire: an empty field is only valid for COUNT ─────────────────── +// `SUM:` / `AVG:` / `MIN:` / `MAX:` with no field used to fall through the same +// fidx < 0 path and be folded as COUNT(*). Only COUNT may omit the field. +TEST_CASE("AdsAggregate remote wire: empty field for non-COUNT is rejected") { + agg_wipe(); + auto dir = agg_tmp_dir(); + seed_agg_fixture(dir); + + RemoteFixture fx; + fx.open(dir); + + UNSIGNED8 forc[] = ""; + UNSIGNED8 spec[] = "SUM:"; // empty field, non-COUNT + ADSHANDLE hRes = 0; + CHECK(AdsAggregate(fx.hTable, forc, spec, &hRes) != AE_SUCCESS); + CHECK(hRes == 0); +} + // ── 3. Remote wire: zero matches ───────────────────────────────────────────── TEST_CASE("AdsAggregate remote wire: zero matches -> 0 / 0 / empty") { agg_wipe(); From 5dc1c0848e586807f2bbbbf6cb3d0c64d14c76ee Mon Sep 17 00:00:00 2001 From: Admnwk <220553748+Admnwk@users.noreply.github.com> Date: Fri, 26 Jun 2026 13:20:11 -0300 Subject: [PATCH 2/3] fix(test): include in abi_aggregate_sqlite_test The fixture uses std::vector but relied on a transitive include that MSVC provides and clang/libc++ does not, breaking the Linux/macOS CI. Include explicitly. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/unit/abi_aggregate_sqlite_test.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/abi_aggregate_sqlite_test.cpp b/tests/unit/abi_aggregate_sqlite_test.cpp index 01d8499e..2fa9b9a2 100644 --- a/tests/unit/abi_aggregate_sqlite_test.cpp +++ b/tests/unit/abi_aggregate_sqlite_test.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace fs = std::filesystem; From ed6117dafb08b2bb473c07639032a29421c45362 Mon Sep 17 00:00:00 2001 From: Admnwk <220553748+Admnwk@users.noreply.github.com> Date: Fri, 26 Jun 2026 13:36:34 -0300 Subject: [PATCH 3/3] fix(index): prevent INDEX ON from corrupting source table indexes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit M13.1 — INDEX ON (SELECT ...) would open the source table file instead of using the materialized cursor, causing the source table's indexes (.cdx) to be rewritten (destructive). Add validation: reject INDEX ON if table name is a SELECT result or parenthesized expression. Enforce that INDEX ON uses a simple table name/identifier, not a cursor result. Error message guides users to materialize the SELECT first using ORDER BY/DISTINCT/LIMIT before creating an index. Fixes: INDEX ON (SELECT * FROM t) no longer corrupts t's .cdx. Co-Authored-By: Claude Haiku 4.5 --- src/abi/ace_exports.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/abi/ace_exports.cpp b/src/abi/ace_exports.cpp index 048dbaf0..ca41cda5 100644 --- a/src/abi/ace_exports.cpp +++ b/src/abi/ace_exports.cpp @@ -13555,6 +13555,18 @@ UNSIGNED32 AdsExecuteSQLDirect(ADSHANDLE hStatement, UNSIGNED8* pucSQL, if (static_cast(p) == c) conn_h = h; }); if (conn_h == 0) return fail(openads::AE_INVALID_CONNECTION_HANDLE, ""); + + // M13.1 — validate table name is not a SELECT result. + // INDEX ON (SELECT ...) would open the source table file instead + // of using the materialized cursor → corrupts the source indexes. + // Enforce that table name is a simple identifier/filename. + const auto& tname = ci.value().table; + if (tname.empty() || tname[0] == '(' || tname.find("SELECT") != std::string::npos) { + return fail(openads::AE_SYNTAX_ERROR, + "INDEX ON requires a table name, not a SELECT result; " + "use SELECT ... ORDER BY/DISTINCT/LIMIT to materialize first"); + } + std::vector name_buf(ci.value().table.size() + 1, 0); std::memcpy(name_buf.data(), ci.value().table.data(), ci.value().table.size());