poyrazK · poyrazK · Mar 7, 2026 · Mar 5, 2026 · Mar 5, 2026 · Mar 5, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -9,14 +9,21 @@ on:
 jobs:
   style-check:
     runs-on: ubuntu-latest
+    permissions:
+      contents: write
     steps:
     - uses: actions/checkout@v4
-    - name: Run clang-format check
+      with:
+        ref: ${{ github.head_ref }}
+    - name: Run clang-format fix
       run: |
         sudo apt-get update
         sudo apt-get install -y clang-format
         find src include tests -name "*.cpp" -o -name "*.hpp" | xargs clang-format -i
-        git diff --exit-code
+    - name: Commit style fixes
+      uses: stefanzweifel/git-auto-commit-action@v5
+      with:
+        commit_message: "style: automated clang-format fixes"
 
   build-and-test:
     needs: style-check

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -48,6 +48,7 @@ set(CORE_SOURCES
     src/distributed/raft_group.cpp
     src/distributed/raft_manager.cpp
     src/distributed/distributed_executor.cpp
+    src/storage/columnar_table.cpp
 )
 
 add_library(sqlEngineCore ${CORE_SOURCES})
@@ -96,6 +97,7 @@ if(BUILD_TESTS)
     add_cloudsql_test(raft_sim_tests tests/raft_simulation_tests.cpp)
     add_cloudsql_test(multi_raft_tests tests/multi_raft_tests.cpp)
     add_cloudsql_test(distributed_txn_tests tests/distributed_txn_tests.cpp)
+    add_cloudsql_test(analytics_tests tests/analytics_tests.cpp)
 
     add_custom_target(run-tests
         COMMAND ${CMAKE_CTEST_COMMAND}

diff --git a/README.md b/README.md
@@ -10,10 +10,14 @@ A lightweight, distributed SQL database engine. Designed for cloud environments
 - **Distributed Query Optimization**: 
   - **Shard Pruning**: Intelligent routing to avoid cluster-wide broadcasts.
   - **Aggregation Merging**: Global coordination for `COUNT`, `SUM`, and other aggregates.
-  - **Broadcast Joins**: Optimized cross-shard joins for small-to-large table scenarios.
+  - **Broadcast & Shuffle Joins**: Optimized cross-shard joins for small-to-large and large-to-large table scenarios.
+- **Data Replication & HA**: Fully redundant data storage with multi-group Raft and automatic leader failover.
+- **Analytics Performance**: 
+  - **Columnar Storage**: Binary-per-column persistence for efficient analytical scanning.
+  - **Vectorized Execution**: Batch-at-a-time processing model for high-throughput query execution.
 - **Multi-Node Transactions**: ACID guarantees across the cluster via Two-Phase Commit (2PC).
 - **Type-Safe Value System**: Robust handling of SQL data types using `std::variant`.
-- **Volcano Execution Engine**: Iterator-based execution supporting sequential scans, index scans, filtering, projection, hash joins, sorting, and aggregation.
+- **Volcano & Vectorized Engine**: Flexible execution models supporting traditional row-based and high-performance columnar processing.
 - **PostgreSQL Wire Protocol**: Handshake and simple query protocol implementation for tool compatibility.
 
 ## Project Structure

diff --git a/docs/phases/PHASE_6_DISTRIBUTED_JOIN.md b/docs/phases/PHASE_6_DISTRIBUTED_JOIN.md
@@ -0,0 +1,32 @@
+# Phase 6: Distributed Multi-Shard Joins (Shuffle Join)
+
+## Overview
+Phase 6 focused on implementing high-performance data redistribution (Shuffle) to enable complex JOIN operations across multiple shards without requiring a full broadcast of tables.
+
+## Key Components
+
+### 1. Context-Aware Shuffle Infrastructure (`common/cluster_manager.hpp`)
+Introduced isolated staging areas for inter-node data movement.
+- **Shuffle Buffering**: Thread-safe memory regions in `ClusterManager` to store incoming data fragments.
+- **Isolation**: Each shuffle context is uniquely identified, allowing multiple concurrent join operations without data corruption.
+
+### 2. Shuffle RPC Protocol (`network/rpc_message.hpp`)
+Developed a dedicated binary protocol for efficient data redistribution.
+- **ShuffleFragment**: Metadata describing the fragment being pushed (target context, source node, schema).
+- **PushData**: High-speed binary payload containing the actual tuple data for the shuffle phase.
+
+### 3. Two-Phase Join Orchestration (`distributed/distributed_executor.cpp`)
+Implemented the control logic for distributed shuffle joins.
+- **Phase 1 (Redistribute)**: Coordinates all data nodes to re-hash and push their local data to the appropriate target nodes based on the join key.
+- **Phase 2 (Local Join)**: Triggers local `HashJoin` operations on each node using the redistributed data stored in shuffle buffers.
+
+### 4. BufferScanOperator Integration (`executor/operator.hpp`)
+Seamlessly integrated shuffle buffers into the Volcano execution model.
+- **Vectorized Buffering**: Optimized the `BufferScanOperator` to handle large volumes of redistributed data with minimal overhead.
+
+## Lessons Learned
+- Shuffle joins significantly reduce network traffic compared to broadcast joins for large-to-large table joins.
+- Fine-grained locking in the shuffle buffers is critical for maintaining high throughput during the redistribution phase.
+
+## Status: 100% Test Pass
+Verified the end-to-end shuffle join flow, including multi-node data movement and final result merging, through automated integration tests.
diff --git a/docs/phases/PHASE_7_REPLICATION_HA.md b/docs/phases/PHASE_7_REPLICATION_HA.md
@@ -0,0 +1,33 @@
+# Phase 7: Replication & High Availability
+
+## Overview
+Phase 7 introduced data redundancy and automatic failover capabilities to the cloudSQL engine, transforming it into a truly fault-tolerant distributed system.
+
+## Key Components
+
+### 1. Multi-Group Raft Management (`distributed/raft_manager.hpp`)
+Developed a sophisticated manager to handle multiple independent consensus groups.
+- **Dynamic Raft Instances**: Orchestrates `RaftGroup` objects for different shards and the global catalog.
+- **Leadership Tracking**: Real-time monitoring of leader status across the entire cluster.
+
+### 2. Log-Based Data Replication (`distributed/raft_group.cpp`)
+Implemented high-performance replication for DML operations.
+- **Binary Log Entries**: Serializes SQL statements into binary logs for replication across nodes.
+- **State Machine Application**: Automatically applies replicated logs to the underlying `StorageManager`, ensuring consistency across all nodes.
+
+### 3. Leader-Aware Routing (`distributed/distributed_executor.cpp`)
+Optimized query execution to leverage the replicated state.
+- **Dynamic Shard Location**: Resolves which node currently leads a specific shard for write operations.
+- **Read-Replica Support**: Enabled the engine to optionally route read queries to non-leader nodes for improved throughput.
+
+### 4. Automatic Failover & Recovery
+Engineered robust mechanisms for maintaining system availability.
+- **Leader Election**: Verified that the Raft consensus protocol correctly handles node failures and elects new leaders.
+- **Persistence**: Full persistence of Raft logs and state ensures cluster recovery after full restarts.
+
+## Lessons Learned
+- Managing multiple Raft groups significantly increases coordination complexity but is essential for scaling distributed data.
+- Leader-aware routing must be highly dynamic to avoid performance bottlenecks during cluster transitions.
+
+## Status: 100% Test Pass
+Successfully verified the entire replication and failover cycle, including node failures during active workloads and final data consistency checks, via automated integration tests.
diff --git a/docs/phases/PHASE_8_ANALYTICS.md b/docs/phases/PHASE_8_ANALYTICS.md
@@ -0,0 +1,34 @@
+# Phase 8: Analytics Performance (Columnar & Vectorized)
+
+## Overview
+Phase 8 introduced native columnar storage and a vectorized execution engine to drastically improve the performance of analytical workloads.
+
+## Key Components
+
+### 1. Columnar Storage Layer (`src/storage/columnar_table.cpp`)
+Implemented a high-performance column-oriented data store.
+- **Binary Column Files**: Stores data in contiguous binary files on disk, one per column.
+- **Batch Read/Write**: Optimized I/O paths for loading and retrieving large blocks of data efficiently.
+- **Schema-Defined Layout**: Automatically organizes data based on the table's schema definition.
+
+### 2. Vectorized Data Structures (`include/executor/types.hpp`)
+Developed SIMD-friendly contiguous memory buffers for batch processing.
+- **ColumnVector & NumericVector**: Specialized C++ templates for storing a "vector" of data for a single column.
+- **VectorBatch**: A collection of `ColumnVector` objects representing a chunk of rows (typically 1024 rows).
+
+### 3. Vectorized Execution Engine (`include/executor/vectorized_operator.hpp`)
+Built a batch-at-a-time physical execution model.
+- **Vectorized Operators**: Implemented `Scan`, `Filter`, `Project`, and `Aggregate` operators designed for chunk-based execution.
+- **Batch-at-a-Time Interface**: Operators pass entire `VectorBatch` objects between themselves, minimizing virtual function call overhead.
+
+### 4. High-Performance Aggregation
+Optimized global analytical queries (`COUNT`, `SUM`).
+- **Vectorized Global Aggregate**: Aggregates entire batches of data with minimal branching and high cache locality.
+- **Type-Specific Aggregation**: Leverages C++ templates to generate highly efficient aggregation logic for different data types.
+
+## Lessons Learned
+- Vectorized execution significantly outperforms the traditional Volcano model for large-scale analytical queries.
+- Columnar storage is essential for minimizing I/O overhead when only a subset of columns is accessed.
+
+## Status: 100% Test Pass
+Successfully verified the end-to-end vectorized pipeline, including columnar data persistence and complex analytical query patterns, through dedicated integration tests.
diff --git a/docs/phases/README.md b/docs/phases/README.md
@@ -36,6 +36,24 @@ This directory contains the technical documentation for the lifecycle of the clo
 - Broadcast Join orchestration.
 - Inter-node data redistribution (Shuffle infrastructure).
 
+### [Phase 6: Distributed Multi-Shard Joins](./PHASE_6_DISTRIBUTED_JOIN.md)
+**Focus**: High-throughput Data Redistribution.
+- Context-aware Shuffle infrastructure in `ClusterManager`.
+- Implementation of `ShuffleFragment` and `PushData` RPC protocols.
+- Two-phase Shuffle Join orchestration in `DistributedExecutor`.
+
+### [Phase 7: Replication & High Availability](./PHASE_7_REPLICATION_HA.md)
+**Focus**: Fault Tolerance & Data Redundancy.
+- Multi-Group Raft management via `RaftManager`.
+- Log-based data replication for DML operations.
+- Leader-aware query routing and automatic failover.
+
+### [Phase 8: Analytics Performance](./PHASE_8_ANALYTICS.md)
+**Focus**: Columnar Storage & Vectorized Execution.
+- Native Columnar storage implementation with binary persistence.
+- Batch-at-a-time vectorized execution model (Scan, Filter, Project, Aggregate).
+- High-performance `NumericVector` and `VectorBatch` data structures.
+
 ---
 
 ## Technical Standards

diff --git a/include/distributed/raft_types.hpp b/include/distributed/raft_types.hpp
@@ -57,7 +57,7 @@ struct RequestVoteArgs {
 
     [[nodiscard]] std::vector<uint8_t> serialize() const {
         std::vector<uint8_t> out;
-        constexpr size_t BASE_SIZE = 24;
+        constexpr size_t BASE_SIZE = 32;
         out.resize(BASE_SIZE + candidate_id.size());
         std::memcpy(out.data(), &term, sizeof(term_t));
         const uint64_t id_len = candidate_id.size();

diff --git a/include/executor/operator.hpp b/include/executor/operator.hpp
@@ -42,11 +42,6 @@ enum class OperatorType : uint8_t {
     BufferScan
 };
 
-/**
- * @brief Execution state
- */
-enum class ExecState : uint8_t { Init, Open, Executing, Done, Error };
-
 /**
  * @brief Base operator class (Volcano iterator model)
  */
@@ -242,11 +237,6 @@ class SortOperator : public Operator {
     [[nodiscard]] Schema& output_schema() override;
 };
 
-/**
- * @brief Aggregate types
- */
-enum class AggregateType : uint8_t { Count, Sum, Avg, Min, Max };
-
 /**
  * @brief Aggregate specification
  */