Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions src/engine/table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,41 @@ void Table::set_recno_sequence(std::vector<std::uint32_t> seq) {
sequence_idx_ = -1;
}

void Table::rebuild_aof_sequence_() {
if (!aof_active_ || driver_ == nullptr) return;
std::vector<std::uint32_t> seq;
if (order_ && order_->index()) {
// Walk the active index in traversal order, keeping only rows that are
// both inside the active scope and AOF matches, so the sparse walk
// follows the index order AND honours the scope (the sequence path
// does not re-check the scope per step).
auto* idx = order_->index();
idx->invalidate_cursor();
const bool desc = order_->descending_traverse();
auto r = desc ? idx->seek_last() : idx->seek_first();
const std::uint32_t guard_max = driver_->record_count() + 1;
std::uint32_t guard = 0;
while (r && r.value().positioned && guard++ <= guard_max) {
const std::string key = idx->current_key();
// Stop once we cross the far scope boundary in the walk direction;
// every later key in this direction is also out of scope.
if (desc) { if (!key_in_top_scope_(key)) break; }
else { if (!key_in_bottom_scope_(key)) break; }
const std::uint32_t rc = r.value().recno;
if (key_in_top_scope_(key) && key_in_bottom_scope_(key) &&
rc >= 1 && rc <= aof_bitmap_.size() && aof_bitmap_[rc - 1])
seq.push_back(rc);
r = desc ? idx->prev() : idx->next();
}
idx->invalidate_cursor();
} else {
seq.reserve(aof_bitmap_.size() / 4);
for (std::size_t i = 0; i < aof_bitmap_.size(); ++i)
if (aof_bitmap_[i]) seq.push_back(static_cast<std::uint32_t>(i + 1));
}
set_recno_sequence(std::move(seq));
}
Comment on lines +271 to +304

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Respect Active Index Scopes in AOF Sequence Rebuild

The current implementation of rebuild_aof_sequence_ walks the entire index from the beginning (seek_first) to the end (seek_last), completely ignoring any active index scopes (order_->scope().top and order_->scope().bottom). This causes any active scope to be bypassed when an AOF is active, resulting in records outside the scope being returned.

To fix this correctness issue:

  1. Respect the scope boundaries by starting the traversal at the appropriate scope boundary (e.g., seek_key with the top scope for ascending traversal, or bottom scope for descending traversal).
  2. Terminate the traversal early once the keys go past the active scope boundaries.
  3. Ensure defensive programming by checking if driver_ is valid before dereferencing it.

Note: Since changing the scope also affects the visible set of records, you should also update set_scope, clear_scope, and clear_scopes to call rebuild_aof_sequence_() so that the cached sequence is correctly rebuilt when scopes change.

void Table::rebuild_aof_sequence_() {
    if (!aof_active_ || !driver_) return;
    std::vector<std::uint32_t> seq;
    if (order_ && order_->index()) {
        // Walk the active index in traversal order, keeping only AOF matches,
        // so the sparse walk follows the index (not recno) order.
        auto* idx = order_->index();
        idx->invalidate_cursor();
        const bool desc = order_->descending_traverse();
        
        util::Result<drivers::SeekOutcome> r = drivers::SeekOutcome{};
        if (desc) {
            if (order_->scope().bottom.has_value()) {
                r = idx->seek_key(*order_->scope().bottom, true);
                if (r && !r.value().positioned) {
                    r = idx->seek_last();
                }
            } else {
                r = idx->seek_last();
            }
        } else {
            if (order_->scope().top.has_value()) {
                r = idx->seek_key(*order_->scope().top, true);
            } else {
                r = idx->seek_first();
            }
        }

        const std::uint32_t guard_max = driver_->record_count() + 1;
        std::uint32_t guard = 0;
        while (r && r.value().positioned && guard++ <= guard_max) {
            const std::string key = idx->current_key();
            if (!desc && !key_in_bottom_scope_(key)) {
                break; // Past the bottom scope in ascending order
            }
            if (desc && !key_in_top_scope_(key)) {
                break; // Past the top scope in descending order
            }
            if (key_in_top_scope_(key) && key_in_bottom_scope_(key)) {
                const std::uint32_t rc = r.value().recno;
                if (rc >= 1 && rc <= aof_bitmap_.size() && aof_bitmap_[rc - 1])
                    seq.push_back(rc);
            }
            r = desc ? idx->prev() : idx->next();
        }
        idx->invalidate_cursor();
    } else {
        seq.reserve(aof_bitmap_.size() / 4);
        for (std::size_t i = 0; i < aof_bitmap_.size(); ++i)
            if (aof_bitmap_[i]) seq.push_back(static_cast<std::uint32_t>(i + 1));
    }
    set_recno_sequence(std::move(seq));
}

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 90d1e96. rebuild_aof_sequence_ now filters rows by key_in_top_scope_/key_in_bottom_scope_ and breaks at the far boundary in the walk direction, guards driver_, and set_scope/clear_scope/clear_scopes rebuild the sequence. Added a regression test (AOF + index scope). I kept a full traversal with a per-row scope check + far-boundary break rather than a scope-boundary seek, to avoid soft-seek edge cases; happy to switch to the boundary seek if you prefer the O(scope) start.


util::Result<void> Table::goto_top() {
// Absolute reposition: drop any read-ahead block so we observe
// writes made through another handle and start the scan fresh.
Expand Down Expand Up @@ -1253,10 +1288,14 @@ void Table::set_order(std::unique_ptr<drivers::IIndex> idx) {
bool desc = idx && idx->descending();
order_.emplace(std::move(idx));
if (desc) order_->set_descending_traverse(true);
// Keep any active AOF sparse sequence aligned to the new order so a
// filtered walk follows it (no-op when no AOF is active).
rebuild_aof_sequence_();
}

void Table::clear_order() {
order_.reset();
rebuild_aof_sequence_();
}

std::unique_ptr<drivers::IIndex> Table::take_order() {
Expand Down Expand Up @@ -1411,20 +1450,23 @@ util::Result<void> Table::set_scope(bool top, const std::string& key) {
}
if (top) order_->scope().top = key;
else order_->scope().bottom = key;
rebuild_aof_sequence_(); // keep any AOF sparse set inside the new scope
return {};
}

util::Result<void> Table::clear_scope(bool top) {
if (!order_) return {};
if (top) order_->scope().top.reset();
else order_->scope().bottom.reset();
rebuild_aof_sequence_();
return {};
}

util::Result<void> Table::clear_scopes() {
if (!order_) return {};
order_->scope().top.reset();
order_->scope().bottom.reset();
rebuild_aof_sequence_();
return {};
}

Expand Down
26 changes: 12 additions & 14 deletions src/engine/table.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,20 +285,12 @@ class Table {
if (r == 0 || r > p->size()) return false;
return static_cast<bool>((*p)[r - 1]);
};
// M-AOF.5 — sparse-bitmap navigation. Translate the bitmap
// into the sorted recno sequence Table already uses to walk
// a precomputed visible set; goto_top / goto_bottom / skip
// then jump O(1) per step instead of iterating every recno
// checking the predicate. Result: Skip-through-visible-set
// becomes O(M) where M is the number of matching records,
// which is where the textbook Rushmore "10-100x" total
// speedup actually shows up.
std::vector<std::uint32_t> seq;
seq.reserve(p->size() / 4); // typical AOF selectivities
for (std::size_t i = 0; i < p->size(); ++i) {
if ((*p)[i]) seq.push_back(static_cast<std::uint32_t>(i + 1));
}
set_recno_sequence(std::move(seq));
// M-AOF.5 — sparse-bitmap navigation. Translate the bitmap into the
// recno sequence Table already uses to walk a precomputed visible set
// so goto_top / goto_bottom / skip jump O(1) per step. The sequence is
// built in the ACTIVE INDEX order (recno order when no order is set) so
// an ordered walk under a filter follows the index, not recno order.
rebuild_aof_sequence_();
}
bool aof_active() const noexcept { return aof_active_; }

Expand Down Expand Up @@ -412,6 +404,12 @@ class Table {
bool key_in_top_scope_ (const std::string& key) const;
bool key_in_bottom_scope_(const std::string& key) const;

// Rebuild the AOF sparse recno sequence (no-op when no AOF is active):
// in the active index order when an order is set, else recno order. Called
// on install_aof_bitmap and whenever the active order changes so the
// filtered walk always follows the active order.
void rebuild_aof_sequence_();

TableTypeForLock to_lock_type_() const noexcept;

std::unique_ptr<drivers::IDriver> driver_;
Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ add_executable(openads_unit_tests
unit/abi_aggregate_test.cpp
unit/aof_eval_test.cpp
unit/abi_aof_test.cpp
unit/abi_aof_order_walk_test.cpp
unit/aes_test.cpp
unit/dbt_memo_test.cpp
unit/fpt_memo_test.cpp
Expand Down
174 changes: 174 additions & 0 deletions tests/unit/abi_aof_order_walk_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
// Regression: an ordered walk with an active AOF/filter must follow the
// ACTIVE INDEX order, not recno order. The qa-diff harness (rddads vs native
// DBFCDX) flagged "filter AGE>=30" walking out of order under OpenADS; this
// confirms it at the ABI (no rddads) so it is pinned as an engine fix.
// Root cause: install_aof_bitmap built the sparse recno_sequence_ in recno
// order, which goto_top/skip prefer over the active index order.
#include "doctest.h"
#include "openads/ace.h"

#include <array>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <filesystem>
#include <fstream>
#include <string>
#include <vector>

namespace fs = std::filesystem;

namespace {
fs::path stage(const fs::path& dir) {
fs::create_directories(dir);
auto p = dir / "data.dbf";
std::vector<std::uint8_t> file;
auto push = [&](const void* d, std::size_t n) {
const auto* b = static_cast<const std::uint8_t*>(d);
file.insert(file.end(), b, b + n);
};
const std::uint16_t rec_len = 1 + 10 + 3;
const std::uint16_t hdr_len = 32 + 32 + 32 + 1;
std::array<std::uint8_t, 32> hdr{};
hdr[0] = 0x03; hdr[4] = 6;
hdr[8] = hdr_len & 0xFF; hdr[9] = (hdr_len >> 8) & 0xFF;
hdr[10] = rec_len & 0xFF; hdr[11] = (rec_len >> 8) & 0xFF;
push(hdr.data(), hdr.size());
std::array<std::uint8_t, 32> f1{};
std::strncpy(reinterpret_cast<char*>(f1.data()), "NAME", 11);
f1[11] = 'C'; f1[16] = 10;
push(f1.data(), f1.size());
std::array<std::uint8_t, 32> f2{};
std::strncpy(reinterpret_cast<char*>(f2.data()), "AGE", 11);
f2[11] = 'N'; f2[16] = 3; f2[17] = 0;
push(f2.data(), f2.size());
file.push_back(0x0D);
auto rec = [&](const char* nm, int age) {
file.push_back(' ');
std::string n = nm; n.resize(10, ' '); push(n.data(), n.size());
char a[4]; std::snprintf(a, sizeof(a), "%3d", age); push(a, 3);
};
rec("r1", 40); rec("r2", 30); rec("r3", 45);
rec("r4", 28); rec("r5", 35); rec("r6", 31);
file.push_back(0x1A);
std::ofstream(p, std::ios::binary).write(
reinterpret_cast<const char*>(file.data()),
static_cast<std::streamsize>(file.size()));
return p;
}
int age_now(ADSHANDLE hT) {
UNSIGNED8 fld[8]; std::memcpy(fld, "AGE", 4);
UNSIGNED8 buf[16] = {0}; UNSIGNED32 cap = sizeof(buf);
REQUIRE(AdsGetField(hT, fld, buf, &cap, 0) == 0);
return std::atoi(reinterpret_cast<char*>(buf));
}
std::string name_now(ADSHANDLE hT) {
UNSIGNED8 fld[8]; std::memcpy(fld, "NAME", 5);
UNSIGNED8 buf[32] = {0}; UNSIGNED32 cap = sizeof(buf);
REQUIRE(AdsGetField(hT, fld, buf, &cap, 0) == 0);
std::string s(reinterpret_cast<char*>(buf), cap);
while (!s.empty() && s.back() == ' ') s.pop_back();
return s;
}
std::vector<std::string> walk_names(ADSHANDLE hT) {
std::vector<std::string> got;
REQUIRE(AdsGotoTop(hT) == 0);
UNSIGNED16 eof = 0;
REQUIRE(AdsAtEOF(hT, &eof) == 0);
while (eof == 0 && got.size() < 50) {
got.push_back(name_now(hT));
REQUIRE(AdsSkip(hT, 1) == 0);
REQUIRE(AdsAtEOF(hT, &eof) == 0);
}
return got;
}
ADSHANDLE connect(const fs::path& dir) {
std::string srv = dir.string();
ADSHANDLE hConn = 0;
REQUIRE(AdsConnect60(reinterpret_cast<UNSIGNED8*>(srv.data()),
ADS_LOCAL_SERVER, nullptr, nullptr, 0, &hConn) == 0);
return hConn;
}
std::vector<int> walk_ages(ADSHANDLE hT) {
std::vector<int> got;
REQUIRE(AdsGotoTop(hT) == 0);
UNSIGNED16 eof = 0;
REQUIRE(AdsAtEOF(hT, &eof) == 0);
while (eof == 0 && got.size() < 50) {
got.push_back(age_now(hT));
REQUIRE(AdsSkip(hT, 1) == 0);
REQUIRE(AdsAtEOF(hT, &eof) == 0);
}
return got;
}
} // namespace

TEST_CASE("ABI: ordered walk under an active AOF keeps the index order") {
auto dir = fs::temp_directory_path() / "openads_aof_order_walk";
std::error_code ec; fs::remove_all(dir, ec);
stage(dir);

ADSHANDLE hConn = connect(dir);
ADSHANDLE hT = 0;
UNSIGNED8 nm[16] = "data";
REQUIRE(AdsOpenTable(hConn, nm, nm, ADS_CDX, 1, 1, 0, 1, &hT) == 0);

ADSHANDLE hIdx = 0;
REQUIRE(AdsCreateIndex61(hT, (UNSIGNED8*)"data", (UNSIGNED8*)"TAGE",
(UNSIGNED8*)"AGE", nullptr, nullptr, 0, 512, &hIdx) == 0);

// Filter AGE>=30 (28 fails). With TAGE active the walk must be ascending
// by AGE, exactly like native DBFCDX.
std::string cond = "AGE >= 30";
REQUIRE(AdsSetAOF(hT, (UNSIGNED8*)cond.data(), 0) == 0);
CHECK(walk_ages(hT) == std::vector<int>{30, 31, 35, 40, 45});

// Re-applying the AOF must also stay ordered.
REQUIRE(AdsClearAOF(hT) == 0);
REQUIRE(AdsSetAOF(hT, (UNSIGNED8*)cond.data(), 0) == 0);
CHECK(walk_ages(hT) == std::vector<int>{30, 31, 35, 40, 45});

// Clearing the AOF restores the full table in index order.
REQUIRE(AdsClearAOF(hT) == 0);
CHECK(walk_ages(hT) == std::vector<int>{28, 30, 31, 35, 40, 45});

REQUIRE(AdsCloseTable(hT) == 0);
REQUIRE(AdsDisconnect(hConn) == 0);
fs::remove_all(dir, ec);
}

// An AOF combined with an index SCOPE must yield only rows that satisfy BOTH,
// in index order. The sparse AOF sequence is walked without re-checking the
// scope, so the rebuild must exclude out-of-scope rows up front.
TEST_CASE("ABI: AOF + index scope yields only in-scope matching rows in order") {
auto dir = fs::temp_directory_path() / "openads_aof_scope_walk";
std::error_code ec; fs::remove_all(dir, ec);
stage(dir); // names r1..r6, ages 40,30,45,28,35,31

ADSHANDLE hConn = connect(dir);
ADSHANDLE hT = 0;
UNSIGNED8 nm[16] = "data";
REQUIRE(AdsOpenTable(hConn, nm, nm, ADS_CDX, 1, 1, 0, 1, &hT) == 0);

ADSHANDLE hIdx = 0;
REQUIRE(AdsCreateIndex61(hT, (UNSIGNED8*)"data", (UNSIGNED8*)"TNAME",
(UNSIGNED8*)"NAME", nullptr, nullptr, 0, 512, &hIdx) == 0);

// Scope NAME in [r2 .. r5] -> r2,r3,r4,r5 are in scope. Keys are passed at
// the full index width (NAME is C(10)) so the boundary compare is exact.
UNSIGNED8 top[16] = "r2 ", bot[16] = "r5 ";
REQUIRE(AdsSetScope(hIdx, ADS_TOP, top, 10, ADS_STRINGKEY) == 0);
REQUIRE(AdsSetScope(hIdx, ADS_BOTTOM, bot, 10, ADS_STRINGKEY) == 0);

// AOF AGE>=30 drops r4 (28). In-scope AND matching: r2,r3,r5 (NAME order).
std::string cond = "AGE >= 30";
REQUIRE(AdsSetAOF(hT, (UNSIGNED8*)cond.data(), 0) == 0);

CHECK(walk_names(hT) ==
std::vector<std::string>{"r2", "r3", "r5"});

REQUIRE(AdsCloseTable(hT) == 0);
REQUIRE(AdsDisconnect(hConn) == 0);
fs::remove_all(dir, ec);
}
Loading