poyrazK · poyrazK · Mar 9, 2026 · Mar 8, 2026 · Mar 8, 2026 · Mar 8, 2026
diff --git a/.gitignore b/.gitignore
@@ -84,6 +84,10 @@ coverage/
 *.orig
 *.rej
 
+# Storage Files
+# ==============
+*.heap
+
 # ==============
 # Emacs
 # ==============

diff --git a/docs/phases/README.md b/docs/phases/README.md
@@ -54,6 +54,13 @@ This directory contains the technical documentation for the lifecycle of the clo
 - Batch-at-a-time vectorized execution model (Scan, Filter, Project, Aggregate).
 - High-performance `NumericVector` and `VectorBatch` data structures.
 
+### Phase 9 — Stability & Testing Refinement
+**Focus**: Engine Robustness & E2E Validation.
+- Slotted-page layout fixes for large table support.
+- Buffer Pool Manager lifecycle management (destructor flushing).
+- Robust Python E2E client with partial-read handling and numeric validation.
+- Standardized test orchestration via `run_test.sh`.
+
 ---
 
 ## Technical Standards

diff --git a/include/parser/parser.hpp b/include/parser/parser.hpp
@@ -25,6 +25,7 @@ class Parser {
 
     std::unique_ptr<Statement> parse_select();
     std::unique_ptr<Statement> parse_create_table();
+    std::unique_ptr<Statement> parse_create_index();
     std::unique_ptr<Statement> parse_insert();
     std::unique_ptr<Statement> parse_update();
     std::unique_ptr<Statement> parse_delete();

diff --git a/include/parser/statement.hpp b/include/parser/statement.hpp
@@ -239,6 +239,43 @@ class CreateTableStatement : public Statement {
     [[nodiscard]] std::string to_string() const override;
 };
 
+/**
+ * @brief CREATE INDEX statement
+ */
+class CreateIndexStatement : public Statement {
+   private:
+    std::string index_name_;
+    std::string table_name_;
+    std::vector<std::string> columns_;
+    bool unique_ = false;
+
+   public:
+    CreateIndexStatement() = default;
+
+    [[nodiscard]] StmtType type() const override { return StmtType::CreateIndex; }
+
+    void set_index_name(std::string name) { index_name_ = std::move(name); }
+    void set_table_name(std::string name) { table_name_ = std::move(name); }
+    void add_column(std::string col) { columns_.push_back(std::move(col)); }
+    void set_unique(bool unique) { unique_ = unique; }
+
+    [[nodiscard]] const std::string& index_name() const { return index_name_; }
+    [[nodiscard]] const std::string& table_name() const { return table_name_; }
+    [[nodiscard]] const std::vector<std::string>& columns() const { return columns_; }
+    [[nodiscard]] bool unique() const { return unique_; }
+
+    [[nodiscard]] std::string to_string() const override {
+        std::string s = "CREATE ";
+        if (unique_) s += "UNIQUE ";
+        s += "INDEX " + index_name_ + " ON " + table_name_ + " (";
+        for (size_t i = 0; i < columns_.size(); ++i) {
+            s += columns_[i] + (i == columns_.size() - 1 ? "" : ", ");
+        }
+        s += ")";
+        return s;
+    }
+};
+
 /**
  * @brief DROP TABLE statement
  */

diff --git a/src/executor/query_executor.cpp b/src/executor/query_executor.cpp
@@ -716,6 +716,12 @@ std::unique_ptr<Operator> QueryExecutor::build_plan(const parser::SelectStatemen
         }
         current_root = std::make_unique<AggregateOperator>(std::move(current_root),
                                                            std::move(group_by), std::move(aggs));
+
+        /* 3.5. Having */
+        if (stmt.having()) {
+            current_root =
+                std::make_unique<FilterOperator>(std::move(current_root), stmt.having()->clone());
+        }
     }
 
     /* 4. Sort (ORDER BY) */

diff --git a/src/parser/lexer.cpp b/src/parser/lexer.cpp
@@ -81,7 +81,8 @@ std::map<std::string, TokenType> Lexer::init_keywords() {
             {"CHAR", TokenType::TypeChar},
             {"BOOL", TokenType::TypeBool},
             {"BOOLEAN", TokenType::TypeBool},
-            {"DISTINCT", TokenType::Distinct}};
+            {"DISTINCT", TokenType::Distinct},
+            {"HAVING", TokenType::Having}};
 }
 
 Token Lexer::next_token() {

diff --git a/src/parser/parser.cpp b/src/parser/parser.cpp
@@ -44,6 +44,9 @@ std::unique_ptr<Statement> Parser::parse_statement() {
             static_cast<void>(next_token());  // consume CREATE
             if (peek_token().type() == TokenType::Table) {
                 stmt = parse_create_table();
+            } else if (peek_token().type() == TokenType::Index ||
+                       peek_token().type() == TokenType::Unique) {
+                stmt = parse_create_index();
             }
             break;
         case TokenType::Insert:
@@ -341,6 +344,62 @@ std::unique_ptr<Statement> Parser::parse_create_table() {
     return stmt;
 }
 
+/**
+ * @brief Parse CREATE INDEX statement
+ */
+std::unique_ptr<Statement> Parser::parse_create_index() {
+    auto stmt = std::make_unique<CreateIndexStatement>();
+    if (consume(TokenType::Unique)) {
+        stmt->set_unique(true);
+    }
+    if (!consume(TokenType::Index)) {
+        return nullptr;
+    }
+
+    const Token name = next_token();
+    if (name.type() != TokenType::Identifier) {
+        return nullptr;
+    }
+    stmt->set_index_name(name.lexeme());
+
+    if (!consume(TokenType::On)) {
+        return nullptr;
+    }
+
+    const Token table_name = next_token();
+    if (table_name.type() != TokenType::Identifier) {
+        return nullptr;
+    }
+    stmt->set_table_name(table_name.lexeme());
+
+    if (!consume(TokenType::LParen)) {
+        return nullptr;
+    }
+
+    bool first = true;
+    while (true) {
+        if (!first && !consume(TokenType::Comma)) {
+            break;
+        }
+        first = false;
+
+        const Token col_name = next_token();
+        if (col_name.type() != TokenType::Identifier) {
+            return nullptr;
+        }
+        stmt->add_column(col_name.lexeme());
+
+        if (peek_token().type() == TokenType::RParen) {
+            break;
+        }
+    }
+
+    if (!consume(TokenType::RParen)) {
+        return nullptr;
+    }
+    return stmt;
+}
+
 /**
  * @brief Parse INSERT statement
  */

diff --git a/src/storage/buffer_pool_manager.cpp b/src/storage/buffer_pool_manager.cpp
@@ -7,6 +7,7 @@
 
 #include <cstdint>
 #include <cstring>
+#include <iostream>
 #include <memory>
 #include <mutex>
 #include <string>
@@ -28,7 +29,19 @@ BufferPoolManager::BufferPoolManager(size_t pool_size, StorageManager& storage_m
     }
 }
 
-BufferPoolManager::~BufferPoolManager() = default;
+BufferPoolManager::~BufferPoolManager() {
+    try {
+        flush_all_pages();
+    } catch (const std::exception& e) {
+        // Log error to stderr; avoid throwing from destructor to prevent std::terminate
+        std::cerr << "[Error] Exception in BufferPoolManager destructor during flush_all_pages: "
+                  << e.what() << std::endl;
+    } catch (...) {
+        std::cerr
+            << "[Error] Unknown exception in BufferPoolManager destructor during flush_all_pages"
+            << std::endl;
+    }
+}
 
 Page* BufferPoolManager::fetch_page(const std::string& file_name, uint32_t page_id) {
     const std::scoped_lock<std::mutex> lock(latch_);
@@ -62,7 +75,11 @@ Page* BufferPoolManager::fetch_page(const std::string& file_name, uint32_t page_
     page->file_name_ = file_name;
     page->pin_count_ = 1;
     page->is_dirty_ = false;
-    storage_manager_.read_page(file_name, page_id, page->get_data());
+
+    if (!storage_manager_.read_page(file_name, page_id, page->get_data())) {
+        // If read fails (e.g. file too short), initialize with zeros
+        std::memset(page->get_data(), 0, Page::PAGE_SIZE);
+    }
 
     replacer_.pin(frame_id);
     return page;

diff --git a/src/storage/heap_table.cpp b/src/storage/heap_table.cpp
@@ -135,12 +135,10 @@ HeapTable::TupleId HeapTable::insert(const executor::Tuple& tuple, uint64_t xmin
         }
 
         const auto required = static_cast<uint16_t>(data_str.size() + 1);
-        const auto slot_array_end =
-            static_cast<uint16_t>(sizeof(PageHeader) + ((header.num_slots + 1) * sizeof(uint16_t)));
 
         /* Check for sufficient free space in the current page */
         if (header.free_space_offset + required < Page::PAGE_SIZE &&
-            slot_array_end < header.free_space_offset) {
+            header.num_slots < DEFAULT_SLOT_COUNT) {
             const uint16_t offset = header.free_space_offset;
             std::memcpy(std::next(buffer.data(), static_cast<std::ptrdiff_t>(offset)),
                         data_str.c_str(), data_str.size() + 1);

diff --git a/tests/analytics_tests.cpp b/tests/analytics_tests.cpp
@@ -219,4 +219,40 @@ TEST(AnalyticsTests, AggregateNullHandling) {
     EXPECT_TRUE(result_batch->get_column(1).is_null(0));
 }
 
+TEST(AnalyticsTests, VectorizedExpressionAdvanced) {
+    StorageManager storage("./test_analytics");
+    Schema schema;
+    schema.add_column("a", common::ValueType::TYPE_INT64, true);
+    schema.add_column("b", common::ValueType::TYPE_INT64, true);
+
+    auto batch = VectorBatch::create(schema);
+    // Row 0: (10, 20)
+    batch->append_tuple(Tuple({common::Value::make_int64(10), common::Value::make_int64(20)}));
+    // Row 1: (NULL, 30)
+    batch->append_tuple(Tuple({common::Value::make_null(), common::Value::make_int64(30)}));
+    // Row 2: (40, NULL)
+    batch->append_tuple(Tuple({common::Value::make_int64(40), common::Value::make_null()}));
+
+    // Test: (a IS NULL) OR (a > 20)
+    auto col_a = std::make_unique<ColumnExpr>("a");
+    auto is_null = std::make_unique<IsNullExpr>(std::move(col_a), false);
+    auto col_a_2 = std::make_unique<ColumnExpr>("a");
+    auto gt_20 =
+        std::make_unique<BinaryExpr>(std::move(col_a_2), TokenType::Gt,
+                                     std::make_unique<ConstantExpr>(common::Value::make_int64(20)));
+
+    BinaryExpr or_expr(std::move(is_null), TokenType::Or, std::move(gt_20));
+
+    NumericVector<bool> res(common::ValueType::TYPE_BOOL);
+    or_expr.evaluate_vectorized(*batch, schema, res);
+
+    ASSERT_EQ(res.size(), 3U);
+    // Row 0: (10 IS NULL) OR (10 > 20) -> FALSE OR FALSE -> FALSE
+    EXPECT_FALSE(res.get(0).as_bool());
+    // Row 1: (NULL IS NULL) OR (NULL > 20) -> TRUE OR NULL -> TRUE
+    EXPECT_TRUE(res.get(1).as_bool());
+    // Row 2: (40 IS NULL) OR (40 > 20) -> FALSE OR TRUE -> TRUE
+    EXPECT_TRUE(res.get(2).as_bool());
+}
+
 }  // namespace