diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c79a37..0bc8477 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.16) -project(tidesdb_cpp VERSION 2.5.2 LANGUAGES CXX) +project(tidesdb_cpp VERSION 2.5.3 LANGUAGES CXX) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) diff --git a/include/tidesdb/tidesdb.hpp b/include/tidesdb/tidesdb.hpp index 71ab606..050f06c 100644 --- a/include/tidesdb/tidesdb.hpp +++ b/include/tidesdb/tidesdb.hpp @@ -115,7 +115,8 @@ enum class ErrorCode Unknown = TDB_ERR_UNKNOWN, Locked = TDB_ERR_LOCKED, Readonly = TDB_ERR_READONLY, - Busy = TDB_ERR_BUSY + Busy = TDB_ERR_BUSY, + Precondition = TDB_ERR_PRECONDITION }; /** @@ -166,6 +167,8 @@ class Exception : public std::runtime_error return "database is read-only"; case TDB_ERR_BUSY: return "database is busy"; + case TDB_ERR_PRECONDITION: + return "precondition failed"; default: return "unknown error"; } @@ -459,6 +462,9 @@ struct Config std::uint64_t unifiedMemtableSyncIntervalUs = 0; // Sync interval for unified WAL int maxConcurrentFlushes = 0; // Global cap on in-flight memtable flushes across all CFs (0 = library default) + bool finishCompactionsOnClose = + false; // false = cancel in-flight compactions at their next checkpoint for a fast + // shutdown (no data loss); true = let them run to completion before close returns tidesdb_objstore_t* objectStore = nullptr; // Pluggable object store connector (nullptr = local only) std::optional @@ -541,6 +547,12 @@ struct DbStats std::uint64_t totalUploads = 0; std::uint64_t totalUploadFailures = 0; bool replicaMode = false; + // Single-writer fencing (object-store mode). primaryEpoch is the lease epoch this primary + // currently holds (0 when not a primary / no lease); seenEpoch is the highest lease epoch a + // replica has observed. A promotion that took bumps primaryEpoch; a fenced primary sees + // replicaMode flip back to true. + std::uint64_t primaryEpoch = 0; + std::uint64_t seenEpoch = 0; // Write-amplification counters (lifetime since open, on-disk framed bytes). uwalBytesWritten // is the shared unified WAL volume (zero when unified mode is off); the remaining fields are // summed across all column families. db-wide WA = (uwal + wal + flush + compaction) / user diff --git a/src/tidesdb.cpp b/src/tidesdb.cpp index 6af6a3e..c09dcb0 100644 --- a/src/tidesdb.cpp +++ b/src/tidesdb.cpp @@ -749,6 +749,7 @@ TidesDB::TidesDB(const Config& config) cConfig.unified_memtable_sync_mode = static_cast(config.unifiedMemtableSyncMode); cConfig.unified_memtable_sync_interval_us = config.unifiedMemtableSyncIntervalUs; cConfig.max_concurrent_flushes = config.maxConcurrentFlushes; + cConfig.finish_compactions_on_close = config.finishCompactionsOnClose ? 1 : 0; cConfig.object_store = config.objectStore; tidesdb_objstore_config_t osCfg; @@ -961,6 +962,8 @@ DbStats TidesDB::getDbStats() stats.totalUploads = cStats.total_uploads; stats.totalUploadFailures = cStats.total_upload_failures; stats.replicaMode = cStats.replica_mode != 0; + stats.primaryEpoch = cStats.primary_epoch; + stats.seenEpoch = cStats.seen_epoch; stats.uwalBytesWritten = cStats.uwal_bytes_written; stats.walBytesWritten = cStats.wal_bytes_written; @@ -1094,6 +1097,7 @@ Config TidesDB::defaultConfig() config.unifiedMemtableSyncMode = static_cast(cConfig.unified_memtable_sync_mode); config.unifiedMemtableSyncIntervalUs = cConfig.unified_memtable_sync_interval_us; config.maxConcurrentFlushes = cConfig.max_concurrent_flushes; + config.finishCompactionsOnClose = cConfig.finish_compactions_on_close != 0; config.objectStore = nullptr; config.objectStoreConfig = std::nullopt; diff --git a/tests/tidesdb_test.cpp b/tests/tidesdb_test.cpp index 12338b6..138d0c9 100644 --- a/tests/tidesdb_test.cpp +++ b/tests/tidesdb_test.cpp @@ -1648,6 +1648,10 @@ TEST_F(TidesDBTest, DbStatsUnifiedFields) ASSERT_TRUE(dbStats.unifiedMemtableEnabled); ASSERT_FALSE(dbStats.objectStoreEnabled); ASSERT_FALSE(dbStats.replicaMode); + + // Single-writer fencing epochs are reported and zero when not in object-store mode + ASSERT_EQ(dbStats.primaryEpoch, 0u); + ASSERT_EQ(dbStats.seenEpoch, 0u); } TEST_F(TidesDBTest, ErrorCodeReadonly) @@ -1661,6 +1665,50 @@ TEST_F(TidesDBTest, ErrorCodeReadonly) ASSERT_EQ(msg, "database is read-only"); } +TEST_F(TidesDBTest, ErrorCodePrecondition) +{ + // Verify the Precondition error code maps correctly + ASSERT_EQ(static_cast(tidesdb::ErrorCode::Precondition), TDB_ERR_PRECONDITION); + ASSERT_EQ(static_cast(tidesdb::ErrorCode::Precondition), -15); + + // Verify error message + std::string msg = tidesdb::Exception::errorMessage(TDB_ERR_PRECONDITION); + ASSERT_EQ(msg, "precondition failed"); +} + +TEST_F(TidesDBTest, FinishCompactionsOnClose) +{ + // Default should leave the fast-shutdown behavior (cancel in-flight compactions) + auto defaultConfig = tidesdb::TidesDB::defaultConfig(); + ASSERT_FALSE(defaultConfig.finishCompactionsOnClose); + + // Opening with the flag enabled must succeed; close runs in-flight compactions to completion + tidesdb::Config config = getConfig(); + config.finishCompactionsOnClose = true; + + tidesdb::TidesDB db(config); + + auto cfConfig = tidesdb::ColumnFamilyConfig::defaultConfig(); + db.createColumnFamily("test_cf", cfConfig); + auto cf = db.getColumnFamily("test_cf"); + + { + auto txn = db.beginTransaction(); + for (int i = 0; i < 50; ++i) + { + txn.put(cf, "key_" + std::to_string(i), "value_" + std::to_string(i), -1); + } + txn.commit(); + } + + auto value = [&] + { + auto txn = db.beginTransaction(); + return txn.get(cf, "key_0"); + }(); + ASSERT_FALSE(value.empty()); +} + TEST_F(TidesDBTest, DefaultConfigUnifiedFields) { auto defaultConfig = tidesdb::TidesDB::defaultConfig(); @@ -1982,6 +2030,20 @@ TEST_F(TidesDBTest, BuiltInComparators) { tidesdb::TidesDB db(getConfig()); + // The "lexicographic" comparator is strcmp-based and treats keys as NUL-terminated C + // strings (it ignores the key sizes), so keys handed to it MUST carry a trailing '\0'; + // otherwise strcmp reads past the stored key into adjacent memory, producing + // non-deterministic ordering and flaky lookups. Build NUL-terminated key bytes here -- the + // trailing '\0' is harmless for the size-bounded comparators (memcmp, reverse, + // case_insensitive), which just treat it as an ordinary final byte present in both the + // stored key and the lookup key. + auto nulTerminatedKey = [](const std::string& s) + { + std::vector bytes(s.begin(), s.end()); + bytes.push_back('\0'); + return bytes; + }; + for (const std::string& name : {std::string("memcmp"), std::string("lexicographic"), std::string("reverse"), std::string("case_insensitive")}) { @@ -1990,16 +2052,20 @@ TEST_F(TidesDBTest, BuiltInComparators) db.createColumnFamily("cmp_" + name, cfConfig); auto cf = db.getColumnFamily("cmp_" + name); + const auto alphaKey = nulTerminatedKey("alpha"); + const auto betaKey = nulTerminatedKey("beta"); + const std::vector oneValue{'1'}; + const std::vector twoValue{'2'}; { auto txn = db.beginTransaction(); - txn.put(cf, "alpha", "1", -1); - txn.put(cf, "beta", "2", -1); + txn.put(cf, alphaKey, oneValue, -1); + txn.put(cf, betaKey, twoValue, -1); txn.commit(); } auto txn = db.beginTransaction(); - auto value = txn.get(cf, "alpha"); - ASSERT_EQ(std::string(value.begin(), value.end()), "1"); + auto value = txn.get(cf, alphaKey); + ASSERT_EQ(std::string(value.begin(), value.end()), "1") << "comparator: " << name; auto stats = cf.getStats(); if (stats.config.has_value())