Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
9860aea
fix(storage): ensure BufferPoolManager flushes all dirty pages on des…
poyrazK Mar 8, 2026
3e8ac2a
fix(storage): resolve 1024-row bug by simplifying page space validati…
poyrazK Mar 8, 2026
62cd6a2
test(e2e): enhance Python client robustness and fix binary name in or…
poyrazK Mar 8, 2026
8255d84
docs: add Stability & Testing Refinement to the roadmap
poyrazK Mar 8, 2026
b3cc142
chore: cleanup reproduction test and revert build changes
poyrazK Mar 8, 2026
5ef2128
chore: ignore heap files and remove them from index
poyrazK Mar 8, 2026
349af8e
style: automated clang-format fixes
poyrazK Mar 8, 2026
d69635c
fix(storage): harden BufferPoolManager destructor with exception safety
poyrazK Mar 8, 2026
f409e85
chore(storage): remove debug logging and dead code from HeapTable
poyrazK Mar 8, 2026
5bfe0ae
test(e2e): improve client reliability and ensure background process c…
poyrazK Mar 8, 2026
c6b8eb2
docs: formalize Phase 9 roadmap and reorganize gitignore categories
poyrazK Mar 8, 2026
b474388
style: automated clang-format fixes
poyrazK Mar 8, 2026
4f35ce6
feat(parser,executor): implement HAVING clause and expand test coverage
poyrazK Mar 8, 2026
355e4ea
style: automated clang-format fixes
poyrazK Mar 8, 2026
878d551
test: expand operator and parser coverage with advanced test cases
poyrazK Mar 8, 2026
bd6f8df
style: automated clang-format fixes
poyrazK Mar 8, 2026
88872b4
test: refine MVCC visibility and expand vectorized operator tests
poyrazK Mar 8, 2026
821ffa6
style: automated clang-format fixes
poyrazK Mar 8, 2026
e2388b2
test: major coverage boost for network, recovery, and executor compon…
poyrazK Mar 8, 2026
2991320
style: automated clang-format fixes
poyrazK Mar 8, 2026
a1f186a
test: achieve significant coverage boost for network and recovery
poyrazK Mar 8, 2026
51b82e4
style: automated clang-format fixes
poyrazK Mar 8, 2026
a5b64a8
test: reach for 70% coverage with exhaustive parser and network edge …
poyrazK Mar 8, 2026
17447c9
parser: implement CreateIndexStatement AST and parse_create_index
poyrazK Mar 9, 2026
5f5eb18
tests: harden cloudSQL execution and visibility tests
poyrazK Mar 9, 2026
b09bdce
tests: resolve raft simulation and networking race conditions
poyrazK Mar 9, 2026
a77dc20
tests: enhance recovery log validation and server handshake reliability
poyrazK Mar 9, 2026
5cd90ae
style: automated clang-format fixes
poyrazK Mar 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ coverage/
*.orig
*.rej

# Storage Files
# ==============
*.heap

# ==============
# Emacs
# ==============
Expand Down
7 changes: 7 additions & 0 deletions docs/phases/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ This directory contains the technical documentation for the lifecycle of the clo
- Batch-at-a-time vectorized execution model (Scan, Filter, Project, Aggregate).
- High-performance `NumericVector` and `VectorBatch` data structures.

### Phase 9 — Stability & Testing Refinement
**Focus**: Engine Robustness & E2E Validation.
- Slotted-page layout fixes for large table support.
- Buffer Pool Manager lifecycle management (destructor flushing).
- Robust Python E2E client with partial-read handling and numeric validation.
- Standardized test orchestration via `run_test.sh`.

---

## Technical Standards
Expand Down
1 change: 1 addition & 0 deletions include/parser/parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class Parser {

std::unique_ptr<Statement> parse_select();
std::unique_ptr<Statement> parse_create_table();
std::unique_ptr<Statement> parse_create_index();
std::unique_ptr<Statement> parse_insert();
std::unique_ptr<Statement> parse_update();
std::unique_ptr<Statement> parse_delete();
Expand Down
37 changes: 37 additions & 0 deletions include/parser/statement.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,43 @@ class CreateTableStatement : public Statement {
[[nodiscard]] std::string to_string() const override;
};

/**
* @brief CREATE INDEX statement
*/
class CreateIndexStatement : public Statement {
private:
std::string index_name_;
std::string table_name_;
std::vector<std::string> columns_;
bool unique_ = false;

public:
CreateIndexStatement() = default;

[[nodiscard]] StmtType type() const override { return StmtType::CreateIndex; }

void set_index_name(std::string name) { index_name_ = std::move(name); }
void set_table_name(std::string name) { table_name_ = std::move(name); }
void add_column(std::string col) { columns_.push_back(std::move(col)); }
void set_unique(bool unique) { unique_ = unique; }

[[nodiscard]] const std::string& index_name() const { return index_name_; }
[[nodiscard]] const std::string& table_name() const { return table_name_; }
[[nodiscard]] const std::vector<std::string>& columns() const { return columns_; }
[[nodiscard]] bool unique() const { return unique_; }

[[nodiscard]] std::string to_string() const override {
std::string s = "CREATE ";
if (unique_) s += "UNIQUE ";
s += "INDEX " + index_name_ + " ON " + table_name_ + " (";
for (size_t i = 0; i < columns_.size(); ++i) {
s += columns_[i] + (i == columns_.size() - 1 ? "" : ", ");
}
s += ")";
return s;
}
};

/**
* @brief DROP TABLE statement
*/
Expand Down
6 changes: 6 additions & 0 deletions src/executor/query_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,12 @@ std::unique_ptr<Operator> QueryExecutor::build_plan(const parser::SelectStatemen
}
current_root = std::make_unique<AggregateOperator>(std::move(current_root),
std::move(group_by), std::move(aggs));

/* 3.5. Having */
if (stmt.having()) {
current_root =
std::make_unique<FilterOperator>(std::move(current_root), stmt.having()->clone());
}
}

/* 4. Sort (ORDER BY) */
Expand Down
3 changes: 2 additions & 1 deletion src/parser/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ std::map<std::string, TokenType> Lexer::init_keywords() {
{"CHAR", TokenType::TypeChar},
{"BOOL", TokenType::TypeBool},
{"BOOLEAN", TokenType::TypeBool},
{"DISTINCT", TokenType::Distinct}};
{"DISTINCT", TokenType::Distinct},
{"HAVING", TokenType::Having}};
}

Token Lexer::next_token() {
Expand Down
59 changes: 59 additions & 0 deletions src/parser/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ std::unique_ptr<Statement> Parser::parse_statement() {
static_cast<void>(next_token()); // consume CREATE
if (peek_token().type() == TokenType::Table) {
stmt = parse_create_table();
} else if (peek_token().type() == TokenType::Index ||
peek_token().type() == TokenType::Unique) {
stmt = parse_create_index();
}
break;
case TokenType::Insert:
Expand Down Expand Up @@ -341,6 +344,62 @@ std::unique_ptr<Statement> Parser::parse_create_table() {
return stmt;
}

/**
* @brief Parse CREATE INDEX statement
*/
std::unique_ptr<Statement> Parser::parse_create_index() {
auto stmt = std::make_unique<CreateIndexStatement>();
if (consume(TokenType::Unique)) {
stmt->set_unique(true);
}
if (!consume(TokenType::Index)) {
return nullptr;
}

const Token name = next_token();
if (name.type() != TokenType::Identifier) {
return nullptr;
}
stmt->set_index_name(name.lexeme());

if (!consume(TokenType::On)) {
return nullptr;
}

const Token table_name = next_token();
if (table_name.type() != TokenType::Identifier) {
return nullptr;
}
stmt->set_table_name(table_name.lexeme());

if (!consume(TokenType::LParen)) {
return nullptr;
}

bool first = true;
while (true) {
if (!first && !consume(TokenType::Comma)) {
break;
}
first = false;

const Token col_name = next_token();
if (col_name.type() != TokenType::Identifier) {
return nullptr;
}
stmt->add_column(col_name.lexeme());

if (peek_token().type() == TokenType::RParen) {
break;
}
}

if (!consume(TokenType::RParen)) {
return nullptr;
}
return stmt;
}

/**
* @brief Parse INSERT statement
*/
Expand Down
21 changes: 19 additions & 2 deletions src/storage/buffer_pool_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include <cstdint>
#include <cstring>
#include <iostream>
#include <memory>
#include <mutex>
#include <string>
Expand All @@ -28,7 +29,19 @@ BufferPoolManager::BufferPoolManager(size_t pool_size, StorageManager& storage_m
}
}

BufferPoolManager::~BufferPoolManager() = default;
BufferPoolManager::~BufferPoolManager() {
try {
flush_all_pages();
} catch (const std::exception& e) {
// Log error to stderr; avoid throwing from destructor to prevent std::terminate
std::cerr << "[Error] Exception in BufferPoolManager destructor during flush_all_pages: "
<< e.what() << std::endl;
} catch (...) {
std::cerr
<< "[Error] Unknown exception in BufferPoolManager destructor during flush_all_pages"
<< std::endl;
}
}

Page* BufferPoolManager::fetch_page(const std::string& file_name, uint32_t page_id) {
const std::scoped_lock<std::mutex> lock(latch_);
Expand Down Expand Up @@ -62,7 +75,11 @@ Page* BufferPoolManager::fetch_page(const std::string& file_name, uint32_t page_
page->file_name_ = file_name;
page->pin_count_ = 1;
page->is_dirty_ = false;
storage_manager_.read_page(file_name, page_id, page->get_data());

if (!storage_manager_.read_page(file_name, page_id, page->get_data())) {
// If read fails (e.g. file too short), initialize with zeros
std::memset(page->get_data(), 0, Page::PAGE_SIZE);
}
Comment on lines +78 to +82
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check how storage_manager_.read_page handles EOF vs actual errors
ast-grep --pattern 'bool read_page($_, $_, $_) {
  $$$
}'

Repository: poyrazK/cloudSQL

Length of output: 42


🏁 Script executed:

# Find the storage manager files
find . -type f -name "*storage*manager*" | head -20

Repository: poyrazK/cloudSQL

Length of output: 131


🏁 Script executed:

# Look at the file in question to understand the context
head -n 100 src/storage/buffer_pool_manager.cpp | tail -n 30

Repository: poyrazK/cloudSQL

Length of output: 993


🏁 Script executed:

# Search for read_page implementations in storage layer
rg "bool read_page" --type cpp -A 5

Repository: poyrazK/cloudSQL

Length of output: 1239


🏁 Script executed:

# Check the read_page implementation in storage_manager.cpp
cat -n src/storage/storage_manager.cpp | head -150

Repository: poyrazK/cloudSQL

Length of output: 5020


🏁 Script executed:

# Find where HeapTable::read_page and BTreeIndex::read_page are implemented
rg "HeapTable::read_page|BTreeIndex::read_page" --type cpp -B 2 -A 10

Repository: poyrazK/cloudSQL

Length of output: 1513


🏁 Script executed:

# Get the fetch_page function signature and what it returns
rg "fetch_page" include/storage/buffer_pool_manager.hpp -A 3

Repository: poyrazK/cloudSQL

Length of output: 174


Callers cannot detect I/O failures—both EOF and actual read errors result in zero-filled pages returned as success.

StorageManager::read_page correctly distinguishes EOF (returns true with zero-fill) from I/O errors (returns false). However, BufferPoolManager::fetch_page masks this distinction by:

  1. Zero-filling on read_page failure
  2. Always returning a valid Page* pointer (never nullptr)

Since HeapTable::read_page and BTreeIndex::read_page only check for nullptr, they cannot differentiate successful reads from I/O failures. A disk read error becomes indistinguishable from a legitimate EOF case—both appear as success with zero-filled data to callers.

This is appropriate for new pages beyond file EOF, but actual I/O errors (disk failures, corruption) are silently masked, potentially causing silent data corruption.

The storage layer should propagate read failures so callers can handle them appropriately, or the buffer pool should use an out-parameter or status code to signal failures separately from the page pointer.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/storage/buffer_pool_manager.cpp` around lines 78 - 82,
BufferPoolManager::fetch_page currently masks StorageManager::read_page failures
by zero-filling and always returning a Page*, so callers can't distinguish EOF
vs I/O error; change fetch_page (and any helper that installs pages into frames)
to treat storage_manager_.read_page returning false as a fatal read error: do
not zero-fill or return the Page* — instead release any reserved frame and
return nullptr (or propagate an error status) so callers like
HeapTable::read_page and BTreeIndex::read_page can detect failures; keep the
existing zero-fill only for the case where read_page returns true but indicates
EOF (if StorageManager encodes EOF as a successful result).


replacer_.pin(frame_id);
return page;
Expand Down
4 changes: 1 addition & 3 deletions src/storage/heap_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,10 @@ HeapTable::TupleId HeapTable::insert(const executor::Tuple& tuple, uint64_t xmin
}

const auto required = static_cast<uint16_t>(data_str.size() + 1);
const auto slot_array_end =
static_cast<uint16_t>(sizeof(PageHeader) + ((header.num_slots + 1) * sizeof(uint16_t)));

/* Check for sufficient free space in the current page */
if (header.free_space_offset + required < Page::PAGE_SIZE &&
slot_array_end < header.free_space_offset) {
header.num_slots < DEFAULT_SLOT_COUNT) {
const uint16_t offset = header.free_space_offset;
std::memcpy(std::next(buffer.data(), static_cast<std::ptrdiff_t>(offset)),
data_str.c_str(), data_str.size() + 1);
Expand Down
36 changes: 36 additions & 0 deletions tests/analytics_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,4 +219,40 @@ TEST(AnalyticsTests, AggregateNullHandling) {
EXPECT_TRUE(result_batch->get_column(1).is_null(0));
}

TEST(AnalyticsTests, VectorizedExpressionAdvanced) {
StorageManager storage("./test_analytics");
Schema schema;
schema.add_column("a", common::ValueType::TYPE_INT64, true);
schema.add_column("b", common::ValueType::TYPE_INT64, true);

auto batch = VectorBatch::create(schema);
// Row 0: (10, 20)
batch->append_tuple(Tuple({common::Value::make_int64(10), common::Value::make_int64(20)}));
// Row 1: (NULL, 30)
batch->append_tuple(Tuple({common::Value::make_null(), common::Value::make_int64(30)}));
// Row 2: (40, NULL)
batch->append_tuple(Tuple({common::Value::make_int64(40), common::Value::make_null()}));

// Test: (a IS NULL) OR (a > 20)
auto col_a = std::make_unique<ColumnExpr>("a");
auto is_null = std::make_unique<IsNullExpr>(std::move(col_a), false);
auto col_a_2 = std::make_unique<ColumnExpr>("a");
auto gt_20 =
std::make_unique<BinaryExpr>(std::move(col_a_2), TokenType::Gt,
std::make_unique<ConstantExpr>(common::Value::make_int64(20)));

BinaryExpr or_expr(std::move(is_null), TokenType::Or, std::move(gt_20));

NumericVector<bool> res(common::ValueType::TYPE_BOOL);
or_expr.evaluate_vectorized(*batch, schema, res);

ASSERT_EQ(res.size(), 3U);
// Row 0: (10 IS NULL) OR (10 > 20) -> FALSE OR FALSE -> FALSE
EXPECT_FALSE(res.get(0).as_bool());
// Row 1: (NULL IS NULL) OR (NULL > 20) -> TRUE OR NULL -> TRUE
EXPECT_TRUE(res.get(1).as_bool());
// Row 2: (40 IS NULL) OR (40 > 20) -> FALSE OR TRUE -> TRUE
EXPECT_TRUE(res.get(2).as_bool());
}

} // namespace
Loading