diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ba2c0c921..4cefbfd0e 100755 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -1,4 +1,4 @@ -#[[ +#[[ Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information @@ -26,38 +26,16 @@ else() message(STATUS "Not using ToolChain") endif () -if (POLICY CMP0079) - cmake_policy(SET CMP0079 NEW) -endif () +cmake_policy(SET CMP0079 NEW) set(TsFile_CPP_VERSION 2.2.1.dev) - -if (MSVC) - # MSVC does not provide a /std:c++11 flag; C++11 is its implicit baseline. - # The lowest explicitly settable standard is /std:c++14. Without this flag, - # the default varies by VS version (VS2017+ defaults to C++14 mode with some - # C++17 extensions), so we pin it explicitly for reproducibility. - set(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS} /W3 /utf-8 /EHsc /bigobj /Zc:__cplusplus /std:c++14") - add_definitions(-DNOMINMAX -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS - -D_SCL_SECURE_NO_WARNINGS -D_WINSOCK_DEPRECATED_NO_WARNINGS) - # Export all symbols of the tsfile shared library automatically so that - # consumers do not need __declspec(dllexport) annotations. - set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) -else () - set(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS} -Wall") -endif () +set(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS} -Wall") if (CMAKE_CXX_COMPILER_ID MATCHES "GNU") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wunused -Wuninitialized -D__STDC_FORMAT_MACROS") endif () message("cmake using: USE_CPP11=${USE_CPP11}") -# MSVC has no /std:c++11; CMake maps this to the closest supported standard -# (C++14 default on MSVC), which compiles the C++11 codebase fine. -set(CMAKE_CXX_STANDARD 11) -set(CMAKE_CXX_STANDARD_REQUIRED OFF) -if (NOT MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") -endif () +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") if (DEFINED ENV{CXX}) set(CMAKE_CXX_COMPILER $ENV{CXX}) @@ -80,13 +58,6 @@ if (${COV_ENABLED}) message("add_definitions -DCOV_ENABLED=1") endif () -option(ENABLE_MEM_STAT "Enable memory status" ON) - -if (ENABLE_MEM_STAT) - add_definitions(-DENABLE_MEM_STAT) - message("add_definitions -DENABLE_MEM_STAT") -endif () - if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build." FORCE) @@ -105,52 +76,25 @@ else () endif () message("CMAKE BUILD TYPE " ${CMAKE_BUILD_TYPE}) -# MSVC provides sensible per-configuration optimization flags by default; the -# GCC-style flags below would be rejected by cl.exe, so skip them on MSVC. -if (NOT MSVC) - if (CMAKE_BUILD_TYPE STREQUAL "Debug") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g") - elseif (CMAKE_BUILD_TYPE STREQUAL "Release") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2") - elseif (CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O2 -g") - elseif (CMAKE_BUILD_TYPE STREQUAL "MinSizeRel") - set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} -ffunction-sections -fdata-sections -Os") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections") - endif () +if (CMAKE_BUILD_TYPE STREQUAL "Debug") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g") +elseif (CMAKE_BUILD_TYPE STREQUAL "Release") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto") +elseif (CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O2 -g") +elseif (CMAKE_BUILD_TYPE STREQUAL "MinSizeRel") + set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} -ffunction-sections -fdata-sections -Os") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections") endif () message("CMAKE DEBUG: CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}") # disable asan by default. option(ENABLE_ASAN "Enable Address Sanitizer" OFF) -if (ENABLE_ASAN) - message("Address Sanitizer is enabled.") - if (MSVC) - # MSVC ships AddressSanitizer; it requires Visual Studio 2019 16.9 or - # newer (MSVC_VERSION >= 1928). Only the address sanitizer is available - # (there is no UndefinedBehaviorSanitizer for MSVC). - if (MSVC_VERSION LESS 1928) - message(FATAL_ERROR - "ENABLE_ASAN requires MSVC 19.28+ (Visual Studio 2019 16.9); " - "detected MSVC_VERSION=${MSVC_VERSION}.") - endif () - # /fsanitize=address is incompatible with the /RTC* runtime checks that - # CMake injects into Debug builds, and with incremental linking. Strip - # /RTC* from the per-config flags and force non-incremental linking. - # - # ASan also needs debug info: /Zi (compile) + /DEBUG (link). Without it - # MSVC emits warning C5072 ("ASAN enabled without debug information - # emission"), which the bundled googletest build promotes to an error - # via /WX in Release builds, and ASan reports lose symbol/line info. - add_compile_options(/fsanitize=address /Zi) - foreach (flagsVar - CMAKE_C_FLAGS_DEBUG CMAKE_CXX_FLAGS_DEBUG - CMAKE_C_FLAGS_RELWITHDEBINFO CMAKE_CXX_FLAGS_RELWITHDEBINFO) - string(REGEX REPLACE "/RTC[1csu]+" "" ${flagsVar} "${${flagsVar}}") - endforeach () - add_link_options(/INCREMENTAL:NO /DEBUG) - elseif (NOT WIN32) +if (NOT WIN32) + if (ENABLE_ASAN) + message("Address Sanitizer is enabled.") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-omit-frame-pointer") if (NOT APPLE) @@ -160,12 +104,8 @@ if (ENABLE_ASAN) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address,undefined") endif () else () - message(WARNING - "ENABLE_ASAN on Windows is only supported with the MSVC toolchain; " - "ignoring it for the current generator.") + message("Address Sanitizer is disabled.") endif () -else () - message("Address Sanitizer is disabled.") endif () option(BUILD_TEST "Build tests" ON) @@ -203,20 +143,21 @@ if (ENABLE_ZLIB) add_definitions(-DENABLE_GZIP) endif() +option(ENABLE_SIMD "Enable SIMD acceleration via SIMDe" ON) +message("cmake using: ENABLE_SIMD=${ENABLE_SIMD}") + option(ENABLE_THREADS "Enable multi-threaded read/write (requires pthreads)" ON) message("cmake using: ENABLE_THREADS=${ENABLE_THREADS}") if (ENABLE_THREADS) add_definitions(-DENABLE_THREADS) - find_package(Threads REQUIRED) - link_libraries(Threads::Threads) endif() -option(ENABLE_SIMDE "Enable SIMDe (SIMD Everywhere)" OFF) -message("cmake using: ENABLE_SIMDE=${ENABLE_SIMDE}") +option(ENABLE_MEM_STAT "Enable per-module memory allocation statistics" ON) +message("cmake using: ENABLE_MEM_STAT=${ENABLE_MEM_STAT}") -if (ENABLE_SIMDE) - add_definitions(-DENABLE_SIMDE) +if (ENABLE_MEM_STAT) + add_definitions(-DENABLE_MEM_STAT) endif() # All libs will be stored here, including libtsfile, compress-encoding lib. @@ -231,15 +172,7 @@ set(LIBRARY_INCLUDE_DIR ${PROJECT_BINARY_DIR}/include CACHE STRING "TsFile inclu set(THIRD_PARTY_INCLUDE ${PROJECT_BINARY_DIR}/third_party) set(SAVED_CXX_FLAGS "${CMAKE_CXX_FLAGS}") -if (MSVC) - # MSVC does not provide a /std:c++11 flag; C++11 is its implicit baseline. - # The lowest explicitly settable standard is /std:c++14. Without this flag, - # the default varies by VS version (VS2017+ defaults to C++14 mode with some - # C++17 extensions), so we pin it explicitly for reproducibility. - set(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS} /W3 /utf-8 /EHsc /bigobj /Zc:__cplusplus /std:c++14") -else () - set(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS} -Wall -std=c++11") -endif () +set(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS} -Wall -std=c++11") add_subdirectory(third_party) set(CMAKE_CXX_FLAGS "${SAVED_CXX_FLAGS}") @@ -253,5 +186,12 @@ else() message("BUILD_TEST is OFF, skipping test directory") endif () -add_subdirectory(examples) +option(BUILD_EXAMPLES "Build examples (requires Arrow/Parquet)" OFF) +if (BUILD_EXAMPLES) + add_subdirectory(examples) +endif() + +if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/experiment/CMakeLists.txt") + add_subdirectory(experiment) +endif() diff --git a/cpp/examples/CMakeLists.txt b/cpp/examples/CMakeLists.txt index 1d5072b08..4988e87cd 100644 --- a/cpp/examples/CMakeLists.txt +++ b/cpp/examples/CMakeLists.txt @@ -10,7 +10,7 @@ with the License. You may obtain a copy of the License at https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an +software distributed under the LICENSE is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations @@ -18,34 +18,30 @@ under the License. ]] cmake_minimum_required(VERSION 3.10) project(examples) -message("Running in exampes directory") +message("Running in examples directory") -if (NOT MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") -endif () +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") -# TsFile include dir +# TsFile include dirs set(SDK_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/../src/) -message("SDK_INCLUDE_DIR: ${SDK_INCLUDE_DIR}") - -# TsFile shared object dir -set(SDK_LIB_DIR_RELEASE ${PROJECT_SOURCE_DIR}/../build/Release/lib) -message("SDK_LIB_DIR_RELEASE: ${SDK_LIB_DIR_RELEASE}") - -set(SDK_LIB_DIR_DEBUG ${PROJECT_SOURCE_DIR}/../build/Debug/lib) -message("SDK_LIB_DIR_DEBUG: ${SDK_LIB_DIR_DEBUG}") +include_directories(${SDK_INCLUDE_DIR}) include_directories(${PROJECT_SOURCE_DIR}/../third_party/antlr4-cpp-runtime-4/runtime/src) -set(BUILD_TYPE "Release") -include_directories(${SDK_INCLUDE_DIR}) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG") -if (NOT MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g") -endif () +# Arrow + Parquet are required (for bench_read) +if(APPLE) + list(APPEND CMAKE_PREFIX_PATH + "/opt/homebrew/opt/apache-arrow/lib/cmake" + "/usr/local/opt/apache-arrow/lib/cmake") +endif() +find_package(Arrow CONFIG REQUIRED) +find_package(Parquet CONFIG REQUIRED) add_subdirectory(cpp_examples) add_subdirectory(c_examples) add_executable(examples examples.cc) target_link_libraries(examples cpp_examples_obj c_examples_obj) -target_link_libraries(examples tsfile) \ No newline at end of file +find_package(Threads REQUIRED) +target_link_libraries(examples tsfile Arrow::arrow_shared Parquet::parquet_shared Threads::Threads) diff --git a/cpp/examples/cpp_examples/CMakeLists.txt b/cpp/examples/cpp_examples/CMakeLists.txt index a2ac8d435..f7215c948 100644 --- a/cpp/examples/cpp_examples/CMakeLists.txt +++ b/cpp/examples/cpp_examples/CMakeLists.txt @@ -18,5 +18,17 @@ under the License. ]] message("Running in examples/cpp_examples directory") -aux_source_directory(. cpp_SRC_LIST) -add_library(cpp_examples_obj OBJECT ${cpp_SRC_LIST}) + +add_library(cpp_examples_obj OBJECT + demo_read.cpp + demo_write.cpp + bench_read.cpp) + +# bench_read.cpp requires C++17 (TsFile headers use [[maybe_unused]]) +# and Arrow/Parquet headers. Both are provided by the parent scope. +set_target_properties(cpp_examples_obj PROPERTIES + CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON) +target_compile_options(cpp_examples_obj PRIVATE -std=c++17) +target_link_libraries(cpp_examples_obj PRIVATE + Arrow::arrow_shared + Parquet::parquet_shared) diff --git a/cpp/examples/cpp_examples/bench_read.cpp b/cpp/examples/cpp_examples/bench_read.cpp new file mode 100644 index 000000000..c657acd79 --- /dev/null +++ b/cpp/examples/cpp_examples/bench_read.cpp @@ -0,0 +1,664 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "bench_read.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "common/schema.h" +#include "common/tablet.h" +#include "common/tsblock/tsblock.h" +#include "common/tsblock/vector/fixed_length_vector.h" +#include "common/tsblock/vector/vector.h" +#include "file/write_file.h" +#include "reader/filter/tag_filter.h" +#include "reader/result_set.h" +#include "reader/table_result_set.h" +#include "reader/tsfile_reader.h" +#include "utils/util_define.h" +#include "writer/tsfile_table_writer.h" + +#define BENCH_HANDLE_ERROR(err_no) \ + do { \ + if ((err_no) != 0) { \ + std::cerr << "tsfile err " << (err_no) << "\n"; \ + return (err_no); \ + } \ + } while (0) + +#define BENCH_CHECK_RET_NEG1(expr) \ + do { \ + int _ts_err = (expr); \ + if (_ts_err != 0) { \ + std::cerr << "tsfile err " << _ts_err << "\n"; \ + return -1; \ + } \ + } while (0) + +namespace { + +static const char* kTable = "bench_table"; +static const char* kTag2Val = "tag_b"; +static const int kNumDevices = 10; +static const char* kFilterDevice = "device_0"; + +static const std::vector kReadCols{"id1", "id2", "s1", + "s2", "s3", "s4"}; + +static std::string device_name(int i) { return "device_" + std::to_string(i); } + +// ─── Cache drop ────────────────────────────────────────────────────────────── + +void bench_drop_cache() { +#if defined(__APPLE__) + if (system("sudo purge") != 0) { + std::cerr << "[bench] purge failed or not available " + "(run `sudo purge` manually before bench_read)\n"; + } +#elif defined(__linux__) + if (system("sync && sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches'") != 0) { + std::cerr << "[bench] drop_caches failed " + "(run `sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches'` " + "manually)\n"; + } +#else + std::cerr << "[bench] bench_drop_cache not supported on this platform\n"; +#endif +} + +// ─── Write +// ──────────────────────────────────────────────────────────────────── + +int write_tsfile(const std::string& path, int64_t row_count) { + storage::libtsfile_init(); + storage::WriteFile file; + int flags = O_WRONLY | O_CREAT | O_TRUNC; +#ifdef _WIN32 + flags |= O_BINARY; +#endif + BENCH_HANDLE_ERROR(file.create(path.c_str(), flags, 0666)); + + auto* schema = new storage::TableSchema( + std::string(kTable), + { + common::ColumnSchema("id1", common::STRING, common::UNCOMPRESSED, + common::PLAIN, common::ColumnCategory::TAG), + common::ColumnSchema("id2", common::STRING, common::UNCOMPRESSED, + common::PLAIN, common::ColumnCategory::TAG), + common::ColumnSchema("s1", common::INT64, common::SNAPPY, + common::PLAIN, common::ColumnCategory::FIELD), + common::ColumnSchema("s2", common::DOUBLE, common::SNAPPY, + common::PLAIN, common::ColumnCategory::FIELD), + common::ColumnSchema("s3", common::FLOAT, common::SNAPPY, + common::PLAIN, common::ColumnCategory::FIELD), + common::ColumnSchema("s4", common::INT32, common::SNAPPY, + common::PLAIN, common::ColumnCategory::FIELD), + }); + + auto* writer = new storage::TsFileTableWriter(&file, schema); + const uint32_t batch_cap = 65536; + int64_t rows_per_dev = row_count / kNumDevices; + + for (int dev = 0; dev < kNumDevices; dev++) { + std::string dev_id = device_name(dev); + int64_t dev_base = dev * rows_per_dev; + + for (int64_t off = 0; off < rows_per_dev;) { + uint32_t n = static_cast( + std::min(batch_cap, rows_per_dev - off)); + storage::Tablet tablet( + kTable, {"id1", "id2", "s1", "s2", "s3", "s4"}, + {common::STRING, common::STRING, common::INT64, common::DOUBLE, + common::FLOAT, common::INT32}, + {common::ColumnCategory::TAG, common::ColumnCategory::TAG, + common::ColumnCategory::FIELD, common::ColumnCategory::FIELD, + common::ColumnCategory::FIELD, common::ColumnCategory::FIELD}, + std::max(n, 1u)); + for (uint32_t i = 0; i < n; i++) { + int64_t ts = dev_base + off + i; + BENCH_HANDLE_ERROR(tablet.add_timestamp(i, ts)); + BENCH_HANDLE_ERROR(tablet.add_value(i, "id1", dev_id.c_str())); + BENCH_HANDLE_ERROR(tablet.add_value(i, "id2", kTag2Val)); + BENCH_HANDLE_ERROR(tablet.add_value(i, "s1", ts)); + BENCH_HANDLE_ERROR(tablet.add_value(i, "s2", ts * 1.1)); + BENCH_HANDLE_ERROR( + tablet.add_value(i, "s3", static_cast(ts % 10000))); + BENCH_HANDLE_ERROR(tablet.add_value( + i, "s4", static_cast(ts % 100000))); + } + BENCH_HANDLE_ERROR(writer->write_table(tablet)); + off += n; + } + } + BENCH_HANDLE_ERROR(writer->flush()); + BENCH_HANDLE_ERROR(writer->close()); + delete writer; + delete schema; + return 0; +} + +int write_parquet(const std::string& path, int64_t row_count) { + try { + auto schema = arrow::schema({ + arrow::field("time", arrow::int64()), + arrow::field("id1", arrow::utf8()), + arrow::field("id2", arrow::utf8()), + arrow::field("s1", arrow::int64()), + arrow::field("s2", arrow::float64()), + arrow::field("s3", arrow::float32()), + arrow::field("s4", arrow::int32()), + }); + + auto writer_props = parquet::WriterProperties::Builder() + .compression(parquet::Compression::SNAPPY) + ->build(); + auto arrow_props = parquet::ArrowWriterProperties::Builder().build(); + + const int64_t batch_cap = 65536; + int64_t rows_per_dev = row_count / kNumDevices; + arrow::MemoryPool* pool = arrow::default_memory_pool(); + + PARQUET_ASSIGN_OR_THROW(auto out, + arrow::io::FileOutputStream::Open(path)); + PARQUET_ASSIGN_OR_THROW( + std::unique_ptr pw, + parquet::arrow::FileWriter::Open(*schema, pool, out, writer_props, + arrow_props)); + + for (int dev = 0; dev < kNumDevices; dev++) { + std::string dev_id = device_name(dev); + int64_t dev_base = dev * rows_per_dev; + + arrow::Int64Builder time_b; + arrow::StringBuilder id1_b; + arrow::StringBuilder id2_b; + arrow::Int64Builder s1_b; + arrow::DoubleBuilder s2_b; + arrow::FloatBuilder s3_b; + arrow::Int32Builder s4_b; + + for (int64_t off = 0; off < rows_per_dev;) { + int64_t n = std::min(batch_cap, rows_per_dev - off); + time_b.Reset(); + id1_b.Reset(); + id2_b.Reset(); + s1_b.Reset(); + s2_b.Reset(); + s3_b.Reset(); + s4_b.Reset(); + for (int64_t i = 0; i < n; i++) { + int64_t ts = dev_base + off + i; + PARQUET_THROW_NOT_OK(time_b.Append(ts)); + PARQUET_THROW_NOT_OK(id1_b.Append(dev_id)); + PARQUET_THROW_NOT_OK(id2_b.Append(kTag2Val)); + PARQUET_THROW_NOT_OK(s1_b.Append(ts)); + PARQUET_THROW_NOT_OK(s2_b.Append(ts * 1.1)); + PARQUET_THROW_NOT_OK( + s3_b.Append(static_cast(ts % 10000))); + PARQUET_THROW_NOT_OK( + s4_b.Append(static_cast(ts % 100000))); + } + PARQUET_ASSIGN_OR_THROW(auto a_time, time_b.Finish()); + PARQUET_ASSIGN_OR_THROW(auto a_id1, id1_b.Finish()); + PARQUET_ASSIGN_OR_THROW(auto a_id2, id2_b.Finish()); + PARQUET_ASSIGN_OR_THROW(auto a_s1, s1_b.Finish()); + PARQUET_ASSIGN_OR_THROW(auto a_s2, s2_b.Finish()); + PARQUET_ASSIGN_OR_THROW(auto a_s3, s3_b.Finish()); + PARQUET_ASSIGN_OR_THROW(auto a_s4, s4_b.Finish()); + auto batch = arrow::RecordBatch::Make( + schema, n, {a_time, a_id1, a_id2, a_s1, a_s2, a_s3, a_s4}); + PARQUET_THROW_NOT_OK(pw->WriteRecordBatch(*batch)); + off += n; + } + } + PARQUET_THROW_NOT_OK(pw->Close()); + PARQUET_THROW_NOT_OK(out->Close()); + return 0; + } catch (const std::exception& e) { + std::cerr << "parquet write: " << e.what() << "\n"; + return 1; + } +} + +// ─── Helpers +// ────────────────────────────────────────────────────────────────── + +static void print_result(const char* engine, double secs, int64_t result_rows, + int64_t checksum) { + std::cout << " " << std::left << std::setw(16) << engine << std::fixed + << std::setprecision(4) << secs << " s | " << std::right + << std::setw(12) << static_cast(result_rows / secs) + << " rows/s" + << " | sum_s1=" << checksum << "\n"; +} + +// ─── Scenario 1: Tag Filter +// ─────────────────────────────────────────────────── + +int64_t tsfile_tag_filter(const std::string& path, int64_t row_count) { + storage::libtsfile_init(); + storage::TsFileReader reader; + BENCH_CHECK_RET_NEG1(reader.open(path)); + + auto table_schema = reader.get_table_schema(std::string(kTable)); + storage::Filter* tag_filter = + storage::TagFilterBuilder(table_schema.get()).eq("id1", kFilterDevice); + + storage::ResultSet* rs = nullptr; + BENCH_CHECK_RET_NEG1( + reader.query(kTable, kReadCols, 0, row_count, rs, tag_filter)); + + int64_t sum = 0; + bool has_next = false; + int ret = common::E_OK; + while (IS_SUCC(ret = rs->next(has_next)) && has_next) { + if (!rs->is_null("s1")) { + sum += rs->get_value("s1"); + } + } + rs->close(); + reader.close(); + delete tag_filter; + return sum; +} + +// Collect row group indices whose statistics overlap the given string equality. +// Equivalent to TsFile's device-level chunk pruning. +static std::vector rg_prune_string_eq(const parquet::FileMetaData& meta, + int col_idx, + const std::string& target) { + std::vector result; + for (int rg = 0; rg < meta.num_row_groups(); ++rg) { + auto stats = meta.RowGroup(rg)->ColumnChunk(col_idx)->statistics(); + if (stats && stats->HasMinMax()) { + auto s = + std::static_pointer_cast(stats); + std::string mn(reinterpret_cast(s->min().ptr), + s->min().len); + std::string mx(reinterpret_cast(s->max().ptr), + s->max().len); + if (target < mn || target > mx) continue; // prune + } + result.push_back(rg); + } + return result; +} + +// Collect row group indices whose time range overlaps [ts_start, ts_end). +// Equivalent to TsFile's page-level time statistics pruning. +static std::vector rg_prune_time_range(const parquet::FileMetaData& meta, + int col_idx, int64_t ts_start, + int64_t ts_end) { + std::vector result; + for (int rg = 0; rg < meta.num_row_groups(); ++rg) { + auto stats = meta.RowGroup(rg)->ColumnChunk(col_idx)->statistics(); + if (stats && stats->HasMinMax()) { + auto s = std::static_pointer_cast(stats); + if (s->max() < ts_start || s->min() >= ts_end) continue; // prune + } + result.push_back(rg); + } + return result; +} + +int64_t parquet_tag_filter(const std::string& path) { + try { + std::vector cols{"time", "id1", "id2", "s1", + "s2", "s3", "s4"}; + arrow::MemoryPool* pool = arrow::default_memory_pool(); + PARQUET_ASSIGN_OR_THROW(auto infile, + arrow::io::ReadableFile::Open(path)); + PARQUET_ASSIGN_OR_THROW( + std::unique_ptr reader, + parquet::arrow::OpenFile(infile, pool)); + + std::shared_ptr file_schema; + PARQUET_THROW_NOT_OK(reader->GetSchema(&file_schema)); + std::vector indices; + for (const auto& name : cols) + indices.push_back(file_schema->GetFieldIndex(name)); + + // Row group pruning via min/max statistics on id1 column. + auto& meta = *reader->parquet_reader()->metadata(); + int id1_col = meta.schema()->ColumnIndex("id1"); + auto matching_rgs = rg_prune_string_eq(meta, id1_col, kFilterDevice); + + PARQUET_ASSIGN_OR_THROW(auto batch_reader, reader->GetRecordBatchReader( + matching_rgs, indices)); + + int64_t sum = 0; + std::shared_ptr batch; + while (batch_reader->ReadNext(&batch).ok() && batch) { + auto id1_arr = std::static_pointer_cast( + batch->GetColumnByName("id1")); + auto s1_arr = std::static_pointer_cast( + batch->GetColumnByName("s1")); + for (int64_t i = 0; i < batch->num_rows(); ++i) { + if (!id1_arr->IsNull(i) && + id1_arr->GetString(i) == kFilterDevice && + !s1_arr->IsNull(i)) { + sum += s1_arr->Value(i); + } + } + } + return sum; + } catch (const std::exception& e) { + std::cerr << "parquet tag filter: " << e.what() << "\n"; + return -1; + } +} + +// ─── Scenario 2: Time Range Filter ─────────────────────────────────────────── + +// TsFile query(start, end) is inclusive on both sides: [start, end]. +// Pass (ts_end - 1) to match Parquet's half-open [ts_start, ts_end) semantics. +int64_t tsfile_time_filter(const std::string& path, int64_t ts_start, + int64_t ts_end) { + storage::libtsfile_init(); + storage::TsFileReader reader; + BENCH_CHECK_RET_NEG1(reader.open(path)); + + storage::ResultSet* rs = nullptr; + BENCH_CHECK_RET_NEG1( + reader.query(kTable, kReadCols, ts_start, ts_end - 1, rs, nullptr)); + + int64_t sum = 0; + bool has_next = false; + int ret = common::E_OK; + while (IS_SUCC(ret = rs->next(has_next)) && has_next) { + if (!rs->is_null("s1")) sum += rs->get_value("s1"); + } + rs->close(); + reader.close(); + return sum; +} + +int64_t parquet_time_filter(const std::string& path, int64_t ts_start, + int64_t ts_end) { + try { + std::vector cols{"time", "id1", "id2", "s1", + "s2", "s3", "s4"}; + arrow::MemoryPool* pool = arrow::default_memory_pool(); + PARQUET_ASSIGN_OR_THROW(auto infile, + arrow::io::ReadableFile::Open(path)); + PARQUET_ASSIGN_OR_THROW( + std::unique_ptr reader, + parquet::arrow::OpenFile(infile, pool)); + + std::shared_ptr file_schema; + PARQUET_THROW_NOT_OK(reader->GetSchema(&file_schema)); + std::vector indices; + for (const auto& name : cols) + indices.push_back(file_schema->GetFieldIndex(name)); + + // Row group pruning via min/max statistics on time column. + auto& meta = *reader->parquet_reader()->metadata(); + int time_col = meta.schema()->ColumnIndex("time"); + auto matching_rgs = + rg_prune_time_range(meta, time_col, ts_start, ts_end); + + PARQUET_ASSIGN_OR_THROW(auto batch_reader, reader->GetRecordBatchReader( + matching_rgs, indices)); + + int64_t sum = 0; + std::shared_ptr batch; + while (batch_reader->ReadNext(&batch).ok() && batch) { + auto time_arr = std::static_pointer_cast( + batch->GetColumnByName("time")); + auto s1_arr = std::static_pointer_cast( + batch->GetColumnByName("s1")); + for (int64_t i = 0; i < batch->num_rows(); ++i) { + int64_t t = time_arr->Value(i); + if (t >= ts_start && t < ts_end && !s1_arr->IsNull(i)) + sum += s1_arr->Value(i); + } + } + return sum; + } catch (const std::exception& e) { + std::cerr << "parquet time filter: " << e.what() << "\n"; + return -1; + } +} + +// ─── Optimized: Batch columnar read ────────────────────────────────────────── + +// Find the 0-based TsBlock vector index for a named column. +// ResultSetMetadata prepends "time" as column 1 (1-indexed), so +// TsBlock vector index = metadata column index - 1. +static int find_vec_idx(storage::ResultSet* rs, const std::string& name) { + auto meta = rs->get_metadata(); + for (int i = 1; i <= static_cast(meta->get_column_count()); ++i) { + if (meta->get_column_name(i) == name) return i - 1; + } + return -1; +} + +// Sum all INT64 values in a Vector, using direct buffer access for the +// common no-null case to avoid per-element overhead. +static int64_t sum_vec_int64(common::Vector* vec, uint32_t rows) { + int64_t sum = 0; + if (!vec->has_null()) { + // Fast path: dense int64_t array, single pointer scan. + const int64_t* p = + reinterpret_cast(vec->get_value_data().get_data()); + for (uint32_t r = 0; r < rows; ++r) sum += p[r]; + } else { + // Slow path: skip null rows; advance sequential cursor manually. + vec->reset_offset(); + for (uint32_t r = 0; r < rows; ++r) { + if (!vec->is_null(r)) { + uint32_t len = 0; + bool null = false; + char* val = vec->read(&len, &null, r); + sum += *reinterpret_cast(val); + vec->update_offset(); + } + } + } + return sum; +} + +// batch_size controls TsBlock capacity; 65536 rows/block matches write batches. +static const int kBatchSize = 65536; + +int64_t tsfile_tag_filter_batch(const std::string& path, int64_t row_count) { + storage::libtsfile_init(); + storage::TsFileReader reader; + BENCH_CHECK_RET_NEG1(reader.open(path)); + + auto table_schema = reader.get_table_schema(std::string(kTable)); + storage::Filter* tag_filter = + storage::TagFilterBuilder(table_schema.get()).eq("id1", kFilterDevice); + + storage::ResultSet* rs = nullptr; + BENCH_CHECK_RET_NEG1(reader.query(kTable, kReadCols, 0, row_count, rs, + tag_filter, kBatchSize)); + + const int s1_idx = find_vec_idx(rs, "s1"); + int64_t sum = 0; + common::TsBlock* block = nullptr; + while (rs->get_next_tsblock(block) == common::E_OK && block) { + sum += sum_vec_int64(block->get_vector(s1_idx), block->get_row_count()); + } + rs->close(); + reader.close(); + delete tag_filter; + return sum; +} + +int64_t tsfile_time_filter_batch(const std::string& path, int64_t ts_start, + int64_t ts_end) { + storage::libtsfile_init(); + storage::TsFileReader reader; + BENCH_CHECK_RET_NEG1(reader.open(path)); + + storage::ResultSet* rs = nullptr; + BENCH_CHECK_RET_NEG1( + reader.query(kTable, kReadCols, ts_start, ts_end - 1, rs, kBatchSize)); + + const int s1_idx = find_vec_idx(rs, "s1"); + int64_t sum = 0; + common::TsBlock* block = nullptr; + while (rs->get_next_tsblock(block) == common::E_OK && block) { + sum += sum_vec_int64(block->get_vector(s1_idx), block->get_row_count()); + } + rs->close(); + reader.close(); + return sum; +} + +} // namespace + +// ─── Entry point ───────────────────────────────────────────────────────────── + +int bench_write(int64_t row_count, bool run_parquet) { + const std::string ts_path = "read_perf_bench.tsfile"; + const std::string pq_path = "read_perf_bench.parquet"; + + std::cout << "rows_total=" << row_count << " devices=" << kNumDevices + << " rows_per_device=" << row_count / kNumDevices + << "\ncolumns: time, id1, id2, s1(INT64), s2(DOUBLE)," + " s3(FLOAT), s4(INT32)\ncompression: SNAPPY\n"; + + { + using clock = std::chrono::high_resolution_clock; + auto t0 = clock::now(); + if (write_tsfile(ts_path, row_count) != 0) return 1; + double s = std::chrono::duration(clock::now() - t0).count(); + std::cout << "write TsFile : " << std::fixed << std::setprecision(3) + << s << " s\n"; + } + if (run_parquet) { + using clock = std::chrono::high_resolution_clock; + auto t0 = clock::now(); + if (write_parquet(pq_path, row_count) != 0) return 1; + double s = std::chrono::duration(clock::now() - t0).count(); + std::cout << "write Parquet : " << std::fixed << std::setprecision(3) + << s << " s\n"; + } + std::cout << "\n"; + return 0; +} + +int bench_read(int64_t row_count, bool run_parquet) { + int64_t rows_per_device = row_count / kNumDevices; + // TIME_FILTER: query the first 1/3 of the total time range. + // Timestamps are laid out as [0, row_count) across all devices. + int64_t time_range_start = 0; + int64_t time_range_end = row_count / 3; // ~333K rows for 1M total + int64_t time_result_rows = time_range_end - time_range_start; + + const std::string ts_path = "read_perf_bench.tsfile"; + const std::string pq_path = "read_perf_bench.parquet"; + + std::cout << "\n"; + + using clock = std::chrono::high_resolution_clock; + + // ── Scenario 1: Tag Filter + // ──────────────────────────────────────────────── + std::cout << "[TAG_FILTER] id1=\"" << kFilterDevice + << "\" result_rows=" << rows_per_device << "\n"; + + auto t0 = clock::now(); + int64_t sum_ts_tag_row = tsfile_tag_filter(ts_path, row_count); + double sec_ts_tag_row = + std::chrono::duration(clock::now() - t0).count(); + if (sum_ts_tag_row < 0) return 1; + + auto t1 = clock::now(); + int64_t sum_ts_tag_bat = tsfile_tag_filter_batch(ts_path, row_count); + double sec_ts_tag_bat = + std::chrono::duration(clock::now() - t1).count(); + if (sum_ts_tag_bat < 0) return 1; + + print_result("TsFile (row)", sec_ts_tag_row, rows_per_device, + sum_ts_tag_row); + print_result("TsFile (batch)", sec_ts_tag_bat, rows_per_device, + sum_ts_tag_bat); + if (run_parquet) { + auto t2 = clock::now(); + int64_t sum_pq_tag = parquet_tag_filter(pq_path); + double sec_pq_tag = + std::chrono::duration(clock::now() - t2).count(); + if (sum_pq_tag < 0) return 1; + print_result("Parquet+Arrow", sec_pq_tag, rows_per_device, sum_pq_tag); + if (sum_ts_tag_row != sum_pq_tag || sum_ts_tag_bat != sum_pq_tag) + std::cerr << " warning: tag filter checksum mismatch\n"; + } + std::cout << "\n"; + + // ── Scenario 2: Time Range Filter + // ───────────────────────────────────────── Both TsFile and Parquet query + // the identical half-open interval [time_range_start, time_range_end). + // TsFile query() is inclusive on both ends, so pass (time_range_end - 1) as + // the upper bound. + std::cout << "[TIME_FILTER] time in [" << time_range_start << ", " + << time_range_end << ")" + << " result_rows=" << time_result_rows << "\n"; + + auto t3 = clock::now(); + int64_t sum_ts_time_row = + tsfile_time_filter(ts_path, time_range_start, time_range_end); + double sec_ts_time_row = + std::chrono::duration(clock::now() - t3).count(); + if (sum_ts_time_row < 0) return 1; + + auto t4 = clock::now(); + int64_t sum_ts_time_bat = + tsfile_time_filter_batch(ts_path, time_range_start, time_range_end); + double sec_ts_time_bat = + std::chrono::duration(clock::now() - t4).count(); + if (sum_ts_time_bat < 0) return 1; + + print_result("TsFile (row)", sec_ts_time_row, time_result_rows, + sum_ts_time_row); + print_result("TsFile (batch)", sec_ts_time_bat, time_result_rows, + sum_ts_time_bat); + if (run_parquet) { + auto t5 = clock::now(); + int64_t sum_pq_time = + parquet_time_filter(pq_path, time_range_start, time_range_end); + double sec_pq_time = + std::chrono::duration(clock::now() - t5).count(); + if (sum_pq_time < 0) return 1; + print_result("Parquet+Arrow", sec_pq_time, time_result_rows, + sum_pq_time); + if (sum_ts_time_row != sum_pq_time || sum_ts_time_bat != sum_pq_time) + std::cerr << " warning: time filter checksum mismatch\n"; + } + + return 0; +} diff --git a/cpp/examples/cpp_examples/bench_read.h b/cpp/examples/cpp_examples/bench_read.h new file mode 100644 index 000000000..3e599f751 --- /dev/null +++ b/cpp/examples/cpp_examples/bench_read.h @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#pragma once +#include + +/** + * TsFile vs Parquet+Arrow baseline read benchmark. + * Writes bench files to cwd, then measures TAG_FILTER and TIME_FILTER. + * row_count must be a positive multiple of 10 (default: 1,000,000). + */ +// Write TsFile (and optionally Parquet) bench files to cwd. +int bench_write(int64_t row_count = 1000000, bool run_parquet = true); + +// Best-effort OS page cache drop for the bench files. +// On macOS: calls `purge` (requires sudo; harmless if it fails). +// On Linux: writes to /proc/sys/vm/drop_caches (requires root). +void bench_drop_cache(); + +// Run read benchmarks against already-written bench files. +// run_parquet: include Parquet+Arrow comparison (set false for TsFile-only +// profiling). +int bench_read(int64_t row_count = 1000000, bool run_parquet = true); diff --git a/cpp/examples/examples.cc b/cpp/examples/examples.cc index edbd819a0..d6a0509eb 100644 --- a/cpp/examples/examples.cc +++ b/cpp/examples/examples.cc @@ -18,16 +18,12 @@ */ #include "c_examples/c_examples.h" +#include "cpp_examples/bench_read.h" #include "cpp_examples/cpp_examples.h" int main() { // C++ examples - // std::cout << "begin write and read tsfile by cpp" << std::endl; demo_write(); demo_read(); - std::cout << "begin write and read tsfile by c" << std::endl; - // C examples - write_tsfile(); - read_tsfile(); return 0; -} \ No newline at end of file +} diff --git a/cpp/examples/read_perf_compare/CMakeLists.txt b/cpp/examples/read_perf_compare/CMakeLists.txt new file mode 100644 index 000000000..8b5dd6cc2 --- /dev/null +++ b/cpp/examples/read_perf_compare/CMakeLists.txt @@ -0,0 +1,23 @@ +#[[ +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +]] + +# bench_read.cpp and bench_read.h live here for organisation. +# The parent examples/CMakeLists.txt is responsible for compiling +# bench_read.cpp into the single `examples` executable. +# No separate executable is built from this directory. diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index 93342c113..1bd823985 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -37,6 +37,9 @@ message("cmake using: ENABLE_LZOKAY=${ENABLE_LZOKAY}") option(ENABLE_ZLIB "Enable Zlib compression" ON) message("cmake using: ENABLE_ZLIB=${ENABLE_ZLIB}") +# ENABLE_SIMD is defined in the top-level CMakeLists.txt +message("cmake using: ENABLE_SIMD=${ENABLE_SIMD}") + message("Running in src directory") if (${COV_ENABLED}) add_compile_options(-fprofile-arcs -ftest-coverage) @@ -75,10 +78,7 @@ endif() if (ENABLE_ZLIB) list(APPEND PROJECT_INCLUDE_DIR - # zlib.h ships in the source tree; zconf.h is generated into the - # build tree by zlib's own CMake, so both directories are needed. - ${CMAKE_SOURCE_DIR}/third_party/zlib-1.3.1 - ${THIRD_PARTY_INCLUDE}/zlib-1.3.1 + ${CMAKE_SOURCE_DIR}/third_party/zlib-1.2.13 ) endif() @@ -89,12 +89,14 @@ if (ENABLE_ANTLR4) message("Adding ANTLR4 include directory") endif() -include_directories(${PROJECT_INCLUDE_DIR}) +if (ENABLE_SIMD) + add_definitions(-DENABLE_SIMD) + list(APPEND PROJECT_INCLUDE_DIR + ${CMAKE_SOURCE_DIR}/third_party/simde-0.8.4-rc3 + ) +endif() -# Mark every translation unit that is compiled into the tsfile library so that -# TSFILE_API (see utils/util_define.h) resolves to an export-side (empty) -# decoration here, and to __declspec(dllimport) for external consumers. -add_definitions(-DTSFILE_BUILDING) +include_directories(${PROJECT_INCLUDE_DIR}) if (ENABLE_ANTLR4) add_subdirectory(parser) @@ -142,6 +144,8 @@ target_link_libraries(write_obj ${COMPRESSION_LIBS}) add_library(tsfile SHARED) +find_package(Threads REQUIRED) + if (${COV_ENABLED}) message("Enable code cov...") if (ENABLE_ANTLR4) @@ -158,6 +162,7 @@ else() endif() endif() +target_link_libraries(tsfile Threads::Threads) add_dependencies(tsfile utils_obj encoding_obj) set(LIBTSFILE_PROJECT_VERSION ${TsFile_CPP_VERSION}) @@ -165,10 +170,4 @@ set(LIBTSFILE_SO_VERSION ${TsFile_CPP_VERSION}) set_target_properties(tsfile PROPERTIES VERSION ${LIBTSFILE_PROJECT_VERSION}) set_target_properties(tsfile PROPERTIES SOVERSION ${LIBTSFILE_SO_VERSION}) -# On Windows a SHARED library produces a .dll (RUNTIME) plus an import .lib -# (ARCHIVE); on Unix it produces a .so (LIBRARY). Cover all three so the -# install step works for every platform. -install(TARGETS tsfile - RUNTIME DESTINATION ${LIBRARY_OUTPUT_PATH} - LIBRARY DESTINATION ${LIBRARY_OUTPUT_PATH} - ARCHIVE DESTINATION ${LIBRARY_OUTPUT_PATH}) \ No newline at end of file +install(TARGETS tsfile LIBRARY DESTINATION ${LIBRARY_OUTPUT_PATH}) \ No newline at end of file diff --git a/cpp/src/common/CMakeLists.txt b/cpp/src/common/CMakeLists.txt index 4406cb219..7ac55ab5c 100644 --- a/cpp/src/common/CMakeLists.txt +++ b/cpp/src/common/CMakeLists.txt @@ -33,10 +33,6 @@ add_library(common_obj OBJECT ${common_SRC_LIST} ${common_mutex_SRC_LIST} ${common_datatype_SRC_LIST}) -if (ENABLE_ANTLR4) - target_compile_definitions(common_obj PRIVATE ENABLE_ANTLR4) -endif() - # install header files recursively file(GLOB_RECURSE HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/*.h") copy_to_dir(${HEADERS} "common_obj") \ No newline at end of file diff --git a/cpp/src/common/allocator/alloc_base.h b/cpp/src/common/allocator/alloc_base.h index c89aed077..a2e3037e4 100644 --- a/cpp/src/common/allocator/alloc_base.h +++ b/cpp/src/common/allocator/alloc_base.h @@ -59,7 +59,7 @@ enum AllocModID { __MAX_MOD_ID = 127, }; -extern TSFILE_API const char* g_mod_names[__LAST_MOD_ID]; +extern const char* g_mod_names[__LAST_MOD_ID]; /* very basic alloc/free interface in C style */ void* mem_alloc(uint32_t size, AllocModID mid); @@ -82,35 +82,43 @@ class ModStat { } void init(); void destroy(); - INLINE void update_alloc(AllocModID mid, int32_t size) { + INLINE void update_alloc(AllocModID mid, int64_t size) { #ifdef ENABLE_MEM_STAT ASSERT(mid < __LAST_MOD_ID); ATOMIC_FAA(get_item(mid), size); #endif } - void update_free(AllocModID mid, uint32_t size) { + void update_free(AllocModID mid, uint64_t size) { #ifdef ENABLE_MEM_STAT ASSERT(mid < __LAST_MOD_ID); - ATOMIC_FAA(get_item(mid), 0 - size); + ATOMIC_FAA(get_item(mid), -static_cast(size)); #endif } void print_stat(); + int64_t get_stat(int8_t mid) { +#ifdef ENABLE_MEM_STAT + if (stat_arr_ != NULL && mid < __LAST_MOD_ID) + return ATOMIC_FAA(get_item(mid), 0LL); +#endif + return 0; + } + #ifdef ENABLE_TEST - int32_t TEST_get_stat(int8_t mid) { return ATOMIC_FAA(get_item(mid), 0); } + int64_t TEST_get_stat(int8_t mid) { return ATOMIC_FAA(get_item(mid), 0LL); } #endif private: - INLINE int32_t* get_item(int8_t mid) { - return &(stat_arr_[mid * (ITEM_SIZE / sizeof(int32_t))]); + INLINE int64_t* get_item(int8_t mid) { + return &(stat_arr_[mid * (ITEM_SIZE / sizeof(int64_t))]); } private: static const int32_t ITEM_SIZE = CACHE_LINE_SIZE; static const int32_t ITEM_COUNT = __LAST_MOD_ID; - int32_t* stat_arr_; + int64_t* stat_arr_; - STATIC_ASSERT((ITEM_SIZE % sizeof(int32_t) == 0), ModStat_ITEM_SIZE_ERROR); + STATIC_ASSERT((ITEM_SIZE % sizeof(int64_t) == 0), ModStat_ITEM_SIZE_ERROR); }; /* base allocator */ @@ -120,7 +128,7 @@ class BaseAllocator { void free(void* ptr) { mem_free(ptr); } }; -extern TSFILE_API BaseAllocator g_base_allocator; +extern BaseAllocator g_base_allocator; } // end namespace common diff --git a/cpp/src/common/allocator/byte_stream.h b/cpp/src/common/allocator/byte_stream.h index 6d08744c3..c944b9875 100644 --- a/cpp/src/common/allocator/byte_stream.h +++ b/cpp/src/common/allocator/byte_stream.h @@ -253,6 +253,8 @@ class ByteStream { }; public: + static const uint32_t DEFAULT_PAGE_SIZE = 1024; + ByteStream(uint32_t page_size, AllocModID mid, bool enable_atomic = false, BaseAllocator& allocator = g_base_allocator) : allocator_(allocator), @@ -263,10 +265,9 @@ class ByteStream { read_pos_(0), marked_read_pos_(0), page_size_(page_size), + page_mask_(page_size - 1), mid_(mid), - wrapped_page_(false, nullptr) { - // assert(page_size >= 16); // commented out by gxh on 2023.03.09 - } + wrapped_page_(false, nullptr) {} // for wrap plain buffer to ByteStream ByteStream(AllocModID mid = MOD_DEFAULT) @@ -278,6 +279,7 @@ class ByteStream { read_pos_(0), marked_read_pos_(0), page_size_(0), + page_mask_(0), mid_(mid), wrapped_page_(false, nullptr) {} @@ -290,7 +292,14 @@ class ByteStream { wrapped_page_.next_.store(nullptr); wrapped_page_.buf_ = (uint8_t*)buf; - page_size_ = buf_len; + // page_mask_ is used as a bitmask and only works correctly for + // power-of-2 page sizes. Round up to the next power-of-2 so that + // (read_pos_ & page_mask_) gives the correct within-page offset and + // the page-crossing check doesn't misfire on arbitrary buffer sizes. + uint32_t ps = 1; + while (ps < (uint32_t)buf_len) ps <<= 1; + page_size_ = ps; + page_mask_ = ps - 1; head_.store(&wrapped_page_); tail_.store(&wrapped_page_); total_size_.store(buf_len); @@ -339,13 +348,14 @@ class ByteStream { // never used TODO void shallow_clone_from(ByteStream& other) { this->page_size_ = other.page_size_; + this->page_mask_ = other.page_mask_; this->mid_ = other.mid_; this->head_.store(other.head_.load()); this->tail_.store(other.tail_.load()); this->total_size_.store(other.total_size_.load()); } - FORCE_INLINE uint32_t total_size() const { return total_size_.load(); } + FORCE_INLINE uint64_t total_size() const { return total_size_.load(); } FORCE_INLINE uint32_t read_pos() const { return read_pos_; }; FORCE_INLINE void wrapped_buf_advance_read_pos(uint32_t size) { if (size + read_pos_ > total_size_.load()) { @@ -365,10 +375,10 @@ class ByteStream { std::cout << "write_buf error " << ret << std::endl; return ret; } - uint32_t remainder = page_size_ - (total_size_.load() % page_size_); + uint32_t remainder = page_size_ - (total_size_.load() & page_mask_); uint32_t copy_len = remainder < (len - write_len) ? remainder : (len - write_len); - memcpy(tail_.load()->buf_ + total_size_.load() % page_size_, + memcpy(tail_.load()->buf_ + (total_size_.load() & page_mask_), buf + write_len, copy_len); total_size_.atomic_aaf(copy_len); write_len += copy_len; @@ -389,11 +399,11 @@ class ByteStream { if (RET_FAIL(check_space())) { return ret; } - uint32_t remainder = page_size_ - (read_pos_ % page_size_); + uint32_t remainder = page_size_ - (read_pos_ & page_mask_); uint32_t copy_len = remainder < want_len_limited - read_len ? remainder : want_len_limited - read_len; - memcpy(buf + read_len, read_page_->buf_ + (read_pos_ % page_size_), + memcpy(buf + read_len, read_page_->buf_ + (read_pos_ & page_mask_), copy_len); read_len += copy_len; read_pos_ += copy_len; @@ -445,16 +455,17 @@ class ByteStream { return b; } b.buf_ = - (char*)(tail_.load()->buf_ + (total_size_.load() % page_size_)); - b.len_ = page_size_ - (total_size_.load() % page_size_); + (char*)(tail_.load()->buf_ + (total_size_.load() & page_mask_)); + b.len_ = page_size_ - (total_size_.load() & page_mask_); return b; } void buffer_used(uint32_t used_bytes) { ASSERT(used_bytes >= 1); // would not span page - ASSERT((total_size_.load() / page_size_) == - ((total_size_.load() + used_bytes - 1) / page_size_)); + ASSERT(page_size_ == 0 || + (total_size_.load() / page_size_) == + ((total_size_.load() + used_bytes - 1) / page_size_)); total_size_.atomic_aaf(used_bytes); } @@ -470,7 +481,7 @@ class ByteStream { if (RET_FAIL(prepare_space())) { return ret; } - uint32_t remainder = page_size_ - (total_size_.load() % page_size_); + uint32_t remainder = page_size_ - (total_size_.load() & page_mask_); uint32_t step = remainder < (len - advanced) ? remainder : (len - advanced); total_size_.atomic_aaf(step); @@ -500,8 +511,8 @@ class ByteStream { if (cur_ != nullptr) { b.buf_ = (char*)cur_->buf_; if (cur_ == end_ && - host_.total_size_.load() % host_.page_size_ != 0) { - b.len_ = host_.total_size_.load() % host_.page_size_; + (host_.total_size_.load() & host_.page_mask_) != 0) { + b.len_ = host_.total_size_.load() & host_.page_mask_; } else { b.len_ = host_.page_size_; } @@ -547,7 +558,7 @@ class ByteStream { // get tail position atomically Page* host_end = nullptr; - uint32_t host_total_size = 0; + uint64_t host_total_size = 0; while (true) { host_end = host_.tail_.load(); host_total_size = host_.total_size_.load(); @@ -558,7 +569,7 @@ class ByteStream { while (true) { if (cur_ == host_end) { - if (host_total_size % host_.page_size_ == 0) { + if ((host_total_size & host_.page_mask_) == 0) { if (read_offset_within_cur_page_ == host_.page_size_) { return b; } else { @@ -572,15 +583,15 @@ class ByteStream { } } else { if (read_offset_within_cur_page_ == - (host_total_size % host_.page_size_)) { + (host_total_size & host_.page_mask_)) { return b; } else { b.buf_ = ((char*)(cur_->buf_)) + read_offset_within_cur_page_; - b.len_ = (host_total_size % host_.page_size_) - + b.len_ = (host_total_size & host_.page_mask_) - read_offset_within_cur_page_; read_offset_within_cur_page_ = - (host_total_size % host_.page_size_); + (host_total_size & host_.page_mask_); total_end_offset_ += b.len_; return b; } @@ -610,7 +621,7 @@ class ByteStream { FORCE_INLINE int prepare_space() { int ret = common::E_OK; if (UNLIKELY(tail_.load() == nullptr || - total_size_.load() % page_size_ == 0)) { + (total_size_.load() & page_mask_) == 0)) { Page* p = nullptr; if (RET_FAIL(alloc_page(p))) { return ret; @@ -627,7 +638,7 @@ class ByteStream { } if (UNLIKELY(read_page_ == nullptr)) { read_page_ = head_.load(); - } else if (UNLIKELY(read_pos_ % page_size_ == 0)) { + } else if (UNLIKELY((read_pos_ & page_mask_) == 0)) { read_page_ = read_page_->next_.load(); } if (UNLIKELY(read_page_ == nullptr)) { @@ -663,10 +674,11 @@ class ByteStream { OptionalAtomic head_; OptionalAtomic tail_; Page* read_page_; // only one thread is allow to reader this ByteStream - OptionalAtomic total_size_; // total size in byte + OptionalAtomic total_size_; // total size in byte uint32_t read_pos_; // current reader position uint32_t marked_read_pos_; // current reader position uint32_t page_size_; + uint32_t page_mask_; // page_size_ - 1, for bitwise AND instead of modulo AllocModID mid_; public: @@ -1166,6 +1178,7 @@ class SerializationUtil { // indicates that memory has been allocated and must be freed. FORCE_INLINE static int read_var_char_ptr(std::string*& str, ByteStream& in) { + str = nullptr; int ret = common::E_OK; int32_t len = 0; int32_t read_len = 0; @@ -1173,7 +1186,6 @@ class SerializationUtil { return ret; } else { if (len == storage::NO_STR_TO_READ) { - str = nullptr; return ret; } else { char* tmp_buf = diff --git a/cpp/src/common/allocator/mem_alloc.cc b/cpp/src/common/allocator/mem_alloc.cc index 524287e75..b7c5c09c1 100644 --- a/cpp/src/common/allocator/mem_alloc.cc +++ b/cpp/src/common/allocator/mem_alloc.cc @@ -95,7 +95,7 @@ void* mem_alloc(uint32_t size, AllocModID mid) { auto high4b = static_cast(header >> 32); *reinterpret_cast(raw) = high4b; *reinterpret_cast(raw + 4) = low4b; - ModStat::get_instance().update_alloc(mid, static_cast(size)); + ModStat::get_instance().update_alloc(mid, static_cast(size)); return raw + header_size; } @@ -158,7 +158,7 @@ void* mem_realloc(void* ptr, uint32_t size) { *reinterpret_cast(p) = high4b; *reinterpret_cast(p + 4) = low4b; ModStat::get_instance().update_alloc( - mid, int32_t(size) - int32_t(original_size)); + mid, int64_t(size) - int64_t(original_size)); return p + ALIGNMENT; } @@ -166,9 +166,9 @@ void ModStat::init() { if (stat_arr_ != NULL) { return; } - stat_arr_ = (int32_t*)(::malloc(ITEM_SIZE * ITEM_COUNT)); + stat_arr_ = (int64_t*)(::malloc(ITEM_SIZE * ITEM_COUNT)); for (int8_t i = 0; i < __LAST_MOD_ID; i++) { - int32_t* item = get_item(i); + int64_t* item = get_item(i); *item = 0; } } @@ -183,14 +183,14 @@ void ModStat::print_stat() { struct Entry { const char* name; - int32_t val; + int64_t val; }; Entry entries[__LAST_MOD_ID]; int count = 0; int64_t total = 0; for (int i = 0; i < __LAST_MOD_ID; i++) { - int32_t val = ATOMIC_FAA(get_item(i), 0); + int64_t val = ATOMIC_FAA(get_item(i), 0LL); total += val; if (val != 0) { entries[count++] = {g_mod_names[i], val}; diff --git a/cpp/src/common/allocator/my_string.h b/cpp/src/common/allocator/my_string.h index ce8a6b229..279ee798e 100644 --- a/cpp/src/common/allocator/my_string.h +++ b/cpp/src/common/allocator/my_string.h @@ -21,7 +21,6 @@ #include -#include #include #include "common/allocator/page_arena.h" diff --git a/cpp/src/common/allocator/page_arena.h b/cpp/src/common/allocator/page_arena.h index 9b8ce5ef6..c0dfbebb9 100644 --- a/cpp/src/common/allocator/page_arena.h +++ b/cpp/src/common/allocator/page_arena.h @@ -47,6 +47,19 @@ class PageArena { FORCE_INLINE void destroy() { reset(); } void reset(); + // Returns the number of bytes actually consumed across all pages. + // This is the precise M_meta size: metadata structs are not data-encoded, + // so arena used bytes == metadata memory exactly. + int64_t get_total_used_bytes() const { + int64_t total = 0; + Page* p = dummy_head_.next_; + while (p) { + total += p->cur_alloc_ - reinterpret_cast(p + 1); + p = p->next_; + } + return total; + } + #ifdef ENABLE_TEST int TEST_get_page_count() const { int count = 0; diff --git a/cpp/src/common/config/config.h b/cpp/src/common/config/config.h index e2b2039a7..e088db3d1 100644 --- a/cpp/src/common/config/config.h +++ b/cpp/src/common/config/config.h @@ -36,7 +36,7 @@ typedef struct ConfigValue { TSEncoding time_encoding_type_; TSDataType time_data_type_; CompressionType time_compress_type_; - int32_t chunk_group_size_threshold_; + int64_t chunk_group_size_threshold_; int32_t record_count_for_next_mem_check_; bool encrypt_flag_ = false; TSEncoding boolean_encoding_type_; @@ -46,14 +46,11 @@ typedef struct ConfigValue { TSEncoding double_encoding_type_; TSEncoding string_encoding_type_; CompressionType default_compression_type_; + bool parallel_read_enabled_; bool parallel_write_enabled_; + int32_t read_thread_count_; int32_t write_thread_count_; - // When true, aligned writer enforces page size limit strictly by - // interleaving time/value writes and sealing pages together when any side - // becomes full. - // When false, aligned writer may disable some page-size checks to improve - // write performance. - bool strict_page_size_ = true; + bool sync_on_close_ = true; } ConfigValue; extern void init_config_value(); @@ -65,7 +62,6 @@ extern void set_config_value(); extern void config_set_page_max_point_count(uint32_t page_max_point_count); extern void config_set_max_degree_of_index_node( uint32_t max_degree_of_index_node); -extern void config_set_strict_page_size(bool strict_page_size); } // namespace common diff --git a/cpp/src/common/constant/tsfile_constant.h b/cpp/src/common/constant/tsfile_constant.h index 01ad1b2bc..096c645ab 100644 --- a/cpp/src/common/constant/tsfile_constant.h +++ b/cpp/src/common/constant/tsfile_constant.h @@ -41,14 +41,9 @@ static const std::string TIME_COLUMN_NAME = "time"; static const unsigned char VALUE_COLUMN_MASK = 0x40; static const int NO_STR_TO_READ = -1; -// Identifier characters: ASCII alphanumerics / underscore plus any non-ASCII -// (high) byte. The \x80-\xFF byte range covers UTF-8 lead/continuation bytes -// (e.g. CJK characters) and is accepted by every std::regex implementation; -// the \uXXXX codepoint-escape form used previously is rejected by MSVC's -// (regex_error: error_escape). -static const std::regex IDENTIFIER_PATTERN("([a-zA-Z0-9_\\x80-\\xFF]+)"); +static const std::regex IDENTIFIER_PATTERN("([a-zA-Z0-9_\\u2E80-\\u9FFF]+)"); static const std::regex NODE_NAME_PATTERN( - "(\\*{0,2}[a-zA-Z0-9_\\x80-\\xFF]+\\*{0,2})"); + "(\\*{0,2}[a-zA-Z0-9_\\u2E80-\\u9FFF]+\\*{0,2})"); static const int DEFAULT_SEGMENT_NUM_FOR_TABLE_NAME = 3; } // namespace storage diff --git a/cpp/src/common/container/bit_map.cc b/cpp/src/common/container/bit_map.cc index 407605e56..a855d55ad 100644 --- a/cpp/src/common/container/bit_map.cc +++ b/cpp/src/common/container/bit_map.cc @@ -31,9 +31,9 @@ BitMap::~BitMap() { } } -int BitMap::init(uint32_t item_size, bool init_as_zero) { +int BitMap::init(uint32_t item_size, bool init_as_zero, AllocModID mod_id) { uint32_t size = (item_size + 7) / 8; - bitmap_ = static_cast(mem_alloc(size, MOD_TSBLOCK)); + bitmap_ = static_cast(mem_alloc(size, mod_id)); // need set to 0, otherwise there will be wrong data const char initial_char = init_as_zero ? 0x00 : 0xFF; memset(bitmap_, initial_char, size); diff --git a/cpp/src/common/container/bit_map.h b/cpp/src/common/container/bit_map.h index 757ab1fb1..9cb030316 100644 --- a/cpp/src/common/container/bit_map.h +++ b/cpp/src/common/container/bit_map.h @@ -21,89 +21,27 @@ #include -#if defined(_MSC_VER) -#include -#endif - +#include "common/allocator/alloc_base.h" #include "utils/errno_define.h" #include "utils/util_define.h" namespace common { -// Cross-platform bit-twiddling helpers. GCC/Clang use their builtins; MSVC -// uses the equivalent intrinsics from ; any other compiler falls -// back to a portable loop. -namespace bitops { -// Population count of an 8-bit value. -FORCE_INLINE int popcount8(uint8_t v) { -#if defined(__GNUC__) || defined(__clang__) - return __builtin_popcount(v); -#elif defined(_MSC_VER) - return static_cast(__popcnt(static_cast(v))); -#else - int c = 0; - while (v) { - v = static_cast(v & (v - 1)); - ++c; - } - return c; -#endif -} -// Count trailing zero bits. The argument must be non-zero. -FORCE_INLINE int ctz_nonzero(uint32_t v) { -#if defined(__GNUC__) || defined(__clang__) - return __builtin_ctz(v); -#elif defined(_MSC_VER) - unsigned long idx; - _BitScanForward(&idx, v); - return static_cast(idx); -#else - int c = 0; - while (!(v & 1u)) { - v >>= 1; - ++c; - } - return c; -#endif -} -// Count trailing zero bits of a 64-bit value. The argument must be non-zero. -FORCE_INLINE int ctz64_nonzero(uint64_t v) { -#if defined(__GNUC__) || defined(__clang__) - return __builtin_ctzll(v); -#elif defined(_MSC_VER) - unsigned long idx; -#if defined(_M_X64) || defined(_M_ARM64) - _BitScanForward64(&idx, v); -#else - // 32-bit MSVC has no _BitScanForward64. - if (static_cast(v) != 0) { - _BitScanForward(&idx, static_cast(v)); - } else { - _BitScanForward(&idx, static_cast(v >> 32)); - idx += 32; - } -#endif - return static_cast(idx); -#else - int c = 0; - while (!(v & 1ull)) { - v >>= 1; - ++c; - } - return c; -#endif -} -} // namespace bitops - class BitMap { public: - BitMap() : bitmap_(nullptr), size_(0), init_as_zero_(true) {} + BitMap() + : bitmap_(nullptr), + size_(0), + init_as_zero_(true), + has_set_bits_(false) {} ~BitMap(); - int init(uint32_t item_size, bool init_as_zero = true); + int init(uint32_t item_size, bool init_as_zero = true, + AllocModID mod_id = MOD_TSBLOCK); FORCE_INLINE void reset() { const char initial_char = init_as_zero_ ? 0x00 : 0xFF; memset(bitmap_, initial_char, size_); + has_set_bits_ = !init_as_zero_; } FORCE_INLINE void set(uint32_t index) { @@ -113,6 +51,7 @@ class BitMap { char* start_addr = bitmap_ + offset; uint8_t bit_mask = get_bit_mask(index); *start_addr = (*start_addr) | (bit_mask); + has_set_bits_ = true; } FORCE_INLINE void clear(uint32_t index) { @@ -124,7 +63,10 @@ class BitMap { *start_addr = (*start_addr) & (~bit_mask); } - FORCE_INLINE void clear_all() { memset(bitmap_, 0x00, size_); } + FORCE_INLINE void clear_all() { + memset(bitmap_, 0x00, size_); + has_set_bits_ = false; + } FORCE_INLINE bool test(uint32_t index) { uint32_t offset = index >> 3; @@ -135,36 +77,28 @@ class BitMap { return (*start_addr & bit_mask); } - // Count the number of bits set to 1 (i.e., number of null entries). FORCE_INLINE uint32_t count_set_bits() const { uint32_t count = 0; const uint8_t* p = reinterpret_cast(bitmap_); for (uint32_t i = 0; i < size_; i++) { - count += bitops::popcount8(p[i]); + count += __builtin_popcount(p[i]); } return count; } - // Find the next set bit (null position) at or after @from, - // within [0, total_bits). Returns total_bits if none found. - // Skips zero bytes in bulk so cost is proportional to the number - // of null bytes, not total rows. FORCE_INLINE uint32_t next_set_bit(uint32_t from, uint32_t total_bits) const { if (from >= total_bits) return total_bits; const uint8_t* p = reinterpret_cast(bitmap_); uint32_t byte_idx = from >> 3; - // Check remaining bits in the first (partial) byte uint8_t byte_val = p[byte_idx] >> (from & 7); if (byte_val) { - return from + bitops::ctz_nonzero(byte_val); + return from + __builtin_ctz(byte_val); } - // Scan subsequent full bytes, skipping zeros const uint32_t byte_end = (total_bits + 7) >> 3; for (++byte_idx; byte_idx < byte_end; ++byte_idx) { if (p[byte_idx]) { - uint32_t pos = - (byte_idx << 3) + bitops::ctz_nonzero(p[byte_idx]); + uint32_t pos = (byte_idx << 3) + __builtin_ctz(p[byte_idx]); return pos < total_bits ? pos : total_bits; } } @@ -175,6 +109,10 @@ class BitMap { FORCE_INLINE char* get_bitmap() { return bitmap_; } + // Fast check: returns false only when guaranteed no bits are set. + // May return true even when no bits are actually set (conservative). + FORCE_INLINE bool may_have_set_bits() const { return has_set_bits_; } + private: FORCE_INLINE uint8_t get_bit_mask(uint32_t index) { return 1 << (index & 7); @@ -184,6 +122,7 @@ class BitMap { char* bitmap_; uint32_t size_; bool init_as_zero_; + bool has_set_bits_; }; } // namespace common diff --git a/cpp/src/common/container/blocking_queue.cc b/cpp/src/common/container/blocking_queue.cc new file mode 100644 index 000000000..c843cd283 --- /dev/null +++ b/cpp/src/common/container/blocking_queue.cc @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "blocking_queue.h" + +namespace common { + +BlockingQueue::BlockingQueue() : queue_(), mutex_(), cond_() { + pthread_mutex_init(&mutex_, NULL); + pthread_cond_init(&cond_, NULL); +} + +BlockingQueue::~BlockingQueue() { + pthread_mutex_destroy(&mutex_); + pthread_cond_destroy(&cond_); +} + +void BlockingQueue::push(void* data) { + pthread_mutex_lock(&mutex_); + queue_.push(data); + pthread_mutex_unlock(&mutex_); + /* + * it is safe to signal after unlock. + * since pthread_cond_wait is guarantee to unlock and sleep atomically. + */ + pthread_cond_signal(&cond_); +} + +void* BlockingQueue::pop() { + void* ret_data = NULL; + pthread_mutex_lock(&mutex_); + while (queue_.empty()) { + pthread_cond_wait(&cond_, &mutex_); + } + ret_data = queue_.front(); + queue_.pop(); + pthread_mutex_unlock(&mutex_); + return ret_data; +} + +} // end namespace common \ No newline at end of file diff --git a/cpp/src/common/container/blocking_queue.h b/cpp/src/common/container/blocking_queue.h new file mode 100644 index 000000000..e03ea3a46 --- /dev/null +++ b/cpp/src/common/container/blocking_queue.h @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#ifndef COMMON_CONTAINER_BLOCKING_QUEUE_H +#define COMMON_CONTAINER_BLOCKING_QUEUE_H + +#include + +#include + +namespace common { + +class BlockingQueue { + public: + BlockingQueue(); + ~BlockingQueue(); + + void push(void* data); + // if empty, blocking + void* pop(); + + private: + std::queue queue_; + pthread_mutex_t mutex_; + pthread_cond_t cond_; +}; + +} // end namespace common +#endif // COMMON_CONTAINER_BLOCKING_QUEUE_H diff --git a/cpp/src/common/container/byte_buffer.h b/cpp/src/common/container/byte_buffer.h index 88006dac6..4e2dfab15 100644 --- a/cpp/src/common/container/byte_buffer.h +++ b/cpp/src/common/container/byte_buffer.h @@ -107,11 +107,11 @@ class ByteBuffer { // for variable len value FORCE_INLINE char* read(uint32_t offset, uint32_t* len) { + ASSERT(offset + variable_type_len_ <= real_data_size_); uint32_t tmp; - // Directly memcpy to avoid potential alignment issues when casting - // int32_t array pointer std::memcpy(&tmp, data_ + offset, sizeof(tmp)); *len = tmp; + ASSERT(offset + variable_type_len_ + *len <= real_data_size_); char* p = &data_[offset + variable_type_len_]; return p; } @@ -128,4 +128,4 @@ class ByteBuffer { }; } // namespace common -#endif // COMMON_CONTAINER_BYTE_BUFFER_H \ No newline at end of file +#endif // COMMON_CONTAINER_BYTE_BUFFER_H diff --git a/cpp/src/common/db_common.h b/cpp/src/common/db_common.h index a2700c61c..8c637c3da 100644 --- a/cpp/src/common/db_common.h +++ b/cpp/src/common/db_common.h @@ -93,9 +93,9 @@ enum CompressionType : uint8_t { INVALID_COMPRESSION = 255 }; -extern TSFILE_API const char* s_data_type_names[8]; -extern TSFILE_API const char* s_encoding_names[12]; -extern TSFILE_API const char* s_compression_names[8]; +extern const char* s_data_type_names[8]; +extern const char* s_encoding_names[12]; +extern const char* s_compression_names[8]; } // namespace common #if defined(__GLIBCXX__) && (__GNUC__ < 7) diff --git a/cpp/src/common/device_id.cc b/cpp/src/common/device_id.cc index b35a8593f..153305889 100644 --- a/cpp/src/common/device_id.cc +++ b/cpp/src/common/device_id.cc @@ -144,7 +144,7 @@ int StringArrayDeviceID::deserialize(common::ByteStream& read_stream) { segments_.clear(); for (uint32_t i = 0; i < num_segments; ++i) { - std::string* segment; + std::string* segment = nullptr; if (RET_FAIL(common::SerializationUtil::read_var_char_ptr( segment, read_stream))) { delete segment; @@ -209,10 +209,10 @@ std::vector StringArrayDeviceID::split_device_id_string( const std::string& device_id_string) { #ifdef ENABLE_ANTLR4 auto splits = storage::PathNodesGenerator::invokeParser(device_id_string); + return split_device_id_string(splits); #else - auto splits = split_string(device_id_string, '.'); + return split_string(device_id_string, '.'); #endif - return split_device_id_string(splits); } std::vector StringArrayDeviceID::split_device_id_string( diff --git a/cpp/src/common/global.cc b/cpp/src/common/global.cc index a510dbff9..a1ea428cc 100644 --- a/cpp/src/common/global.cc +++ b/cpp/src/common/global.cc @@ -24,20 +24,11 @@ #endif #include -#include - -#ifdef ENABLE_THREADS -#include "common/thread_pool.h" -#endif #include "utils/injection.h" -#include "utils/util_define.h" // strncasecmp and other platform-compat shims namespace common { ColumnSchema g_time_column_schema; -#ifdef ENABLE_THREADS -ThreadPool* g_write_thread_pool_ = nullptr; -#endif ConfigValue g_config_value_; void init_config_value() { @@ -61,22 +52,19 @@ void init_config_value() { g_config_value_.boolean_encoding_type_ = PLAIN; g_config_value_.int32_encoding_type_ = TS_2DIFF; g_config_value_.int64_encoding_type_ = TS_2DIFF; - g_config_value_.float_encoding_type_ = GORILLA; - g_config_value_.double_encoding_type_ = GORILLA; + g_config_value_.float_encoding_type_ = PLAIN; + g_config_value_.double_encoding_type_ = PLAIN; g_config_value_.string_encoding_type_ = PLAIN; // Default compression type is LZ4 #ifdef ENABLE_LZ4 - g_config_value_.default_compression_type_ = LZ4; + g_config_value_.default_compression_type_ = SNAPPY; #else g_config_value_.default_compression_type_ = UNCOMPRESSED; #endif - unsigned int hw_cores = std::thread::hardware_concurrency(); - if (hw_cores == 0) hw_cores = 1; // fallback if detection fails - g_config_value_.parallel_write_enabled_ = (hw_cores > 1); - g_config_value_.write_thread_count_ = - static_cast(std::min(hw_cores, 64u)); - // Enforce aligned page size limits strictly by default. - g_config_value_.strict_page_size_ = true; + g_config_value_.parallel_read_enabled_ = true; + g_config_value_.parallel_write_enabled_ = true; + g_config_value_.read_thread_count_ = 4; + g_config_value_.write_thread_count_ = 6; } extern TSEncoding get_value_encoder(TSDataType data_type) { @@ -121,10 +109,6 @@ void config_set_max_degree_of_index_node(uint32_t max_degree_of_index_node) { g_config_value_.max_degree_of_index_node_ = max_degree_of_index_node; } -void config_set_strict_page_size(bool strict_page_size) { - g_config_value_.strict_page_size_ = strict_page_size; -} - void set_config_value() {} const char* s_data_type_names[8] = {"BOOLEAN", "INT32", "INT64", "FLOAT", "DOUBLE", "TEXT", "VECTOR", "STRING"}; @@ -144,15 +128,6 @@ int init_common() { g_time_column_schema.encoding_ = PLAIN; g_time_column_schema.compression_ = UNCOMPRESSED; g_time_column_schema.column_name_ = storage::TIME_COLUMN_NAME; -#ifdef ENABLE_THREADS - // (Re)create the global write thread pool with the configured size. - delete g_write_thread_pool_; - size_t pool_size = - g_config_value_.write_thread_count_ > 0 - ? static_cast(g_config_value_.write_thread_count_) - : size_t{1}; - g_write_thread_pool_ = new ThreadPool(pool_size); -#endif return ret; } diff --git a/cpp/src/common/global.h b/cpp/src/common/global.h index 5bee0fa60..985ddeab4 100644 --- a/cpp/src/common/global.h +++ b/cpp/src/common/global.h @@ -26,8 +26,8 @@ #include "common/config/config.h" namespace common { -extern TSFILE_API ConfigValue g_config_value_; -extern TSFILE_API ColumnSchema g_time_column_schema; +extern ConfigValue g_config_value_; +extern ColumnSchema g_time_column_schema; FORCE_INLINE int set_global_time_data_type(uint8_t data_type) { ASSERT(data_type >= BOOLEAN && data_type <= STRING); @@ -163,30 +163,34 @@ FORCE_INLINE uint8_t get_global_compression() { return static_cast(g_config_value_.default_compression_type_); } +FORCE_INLINE void set_parallel_read_enabled(bool enabled) { + g_config_value_.parallel_read_enabled_ = enabled; +} + +FORCE_INLINE bool get_parallel_read_enabled() { + return g_config_value_.parallel_read_enabled_; +} + FORCE_INLINE void set_parallel_write_enabled(bool enabled) { g_config_value_.parallel_write_enabled_ = enabled; } FORCE_INLINE bool get_parallel_write_enabled() { - return g_config_value_.parallel_write_enabled_ && - g_config_value_.write_thread_count_ > 1; + return g_config_value_.parallel_write_enabled_; +} + +FORCE_INLINE int set_read_thread_count(int32_t count) { + if (count < 1 || count > 64) return E_INVALID_ARG; + g_config_value_.read_thread_count_ = count; + return E_OK; } -// Set the number of threads for parallel writes. Must be called before -// init_common() / libtsfile_init() — the global thread pool is created -// during initialization and is not resized at runtime. FORCE_INLINE int set_write_thread_count(int32_t count) { if (count < 1 || count > 64) return E_INVALID_ARG; g_config_value_.write_thread_count_ = count; return E_OK; } -#ifdef ENABLE_THREADS -class ThreadPool; -// Global write thread pool, created by init_common(). -extern ThreadPool* g_write_thread_pool_; -#endif - extern int init_common(); extern bool is_timestamp_column_name(const char* time_col_name); extern void cols_to_json(ByteStream* byte_stream, diff --git a/cpp/src/common/mutex/mutex.h b/cpp/src/common/mutex/mutex.h index b35d328de..726288cd9 100644 --- a/cpp/src/common/mutex/mutex.h +++ b/cpp/src/common/mutex/mutex.h @@ -20,28 +20,46 @@ #ifndef COMMON_MUTEX_MUTEX_H #define COMMON_MUTEX_MUTEX_H -#include +#include +#include #include "utils/util_define.h" namespace common { -// Thin wrapper over std::mutex. Implemented with the C++11 standard library -// (instead of pthreads directly) so it builds on every platform, including -// MSVC where pthreads is not available. class Mutex { public: - Mutex() {} - ~Mutex() {} + Mutex() : mutex_() { pthread_mutex_init(&mutex_, NULL); } + ~Mutex() { pthread_mutex_destroy(&mutex_); } - void lock() { mutex_.lock(); } + void lock() { + int ret = EBUSY; + do { + ret = pthread_mutex_lock(&mutex_); + } while (UNLIKELY(ret == EBUSY || ret == EAGAIN)); + ASSERT(ret == 0); + } - void unlock() { mutex_.unlock(); } + void unlock() { + int ret = pthread_mutex_unlock(&mutex_); + ASSERT(ret == 0); + (void)ret; + } - bool try_lock() { return mutex_.try_lock(); } + bool try_lock() { + int ret = pthread_mutex_trylock(&mutex_); + if (ret == 0) { + return true; + } else if (ret == EBUSY || ret == EAGAIN) { + return false; + } else { + ASSERT(false); + return false; + } + } private: - std::mutex mutex_; + pthread_mutex_t mutex_; }; class MutexGuard { diff --git a/cpp/src/common/path.cc b/cpp/src/common/path.cc deleted file mode 100644 index d70a9d6c6..000000000 --- a/cpp/src/common/path.cc +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#include "common/path.h" - -#include "common/constant/tsfile_constant.h" - -#ifdef ENABLE_ANTLR4 -#include "parser/path_nodes_generator.h" -#endif - -namespace storage { - -Path::Path() = default; - -Path::Path(std::string& device, std::string& measurement) - : measurement_(measurement), - device_id_(std::make_shared(device)) { - full_path_ = device + "." + measurement; -} - -Path::Path(const std::string& path_sc, bool if_split) { - if (!path_sc.empty()) { - if (!if_split) { - full_path_ = path_sc; - device_id_ = std::make_shared(path_sc); - } else { -#ifdef ENABLE_ANTLR4 - std::vector nodes = - PathNodesGenerator::invokeParser(path_sc); -#else - std::vector nodes = - IDeviceID::split_string(path_sc, '.'); -#endif - if (nodes.size() > 1) { - // Join nodes, then parse like write path / Java Path (not - // per-segment vector). - std::string device_joined; - for (size_t i = 0; i + 1 < nodes.size(); ++i) { - if (i > 0) { - device_joined += PATH_SEPARATOR_CHAR; - } - device_joined += nodes[i]; - } - device_id_ = - std::make_shared(device_joined); - measurement_ = nodes[nodes.size() - 1]; - full_path_ = device_id_->get_device_name() + "." + measurement_; - } else { - full_path_ = path_sc; - device_id_ = std::make_shared(); - measurement_ = path_sc; - } - } - } else { - full_path_ = ""; - device_id_ = std::make_shared(); - measurement_ = ""; - } -} - -} // namespace storage diff --git a/cpp/src/common/path.h b/cpp/src/common/path.h index 3896b2715..55abf810d 100644 --- a/cpp/src/common/path.h +++ b/cpp/src/common/path.h @@ -22,6 +22,10 @@ #include #include "common/device_id.h" +#ifdef ENABLE_ANTLR4 +#include "parser/generated/PathParser.h" +#include "parser/path_nodes_generator.h" +#endif #include "utils/errno_define.h" namespace storage { @@ -31,9 +35,46 @@ struct Path { std::shared_ptr device_id_; std::string full_path_; - Path(); - Path(std::string& device, std::string& measurement); - Path(const std::string& path_sc, bool if_split = true); + Path() {} + + Path(std::string& device, std::string& measurement) + : measurement_(measurement), + device_id_(std::make_shared(device)) { + full_path_ = device + "." + measurement; + } + + Path(const std::string& path_sc, bool if_split = true) { + if (!path_sc.empty()) { + if (!if_split) { + full_path_ = path_sc; + device_id_ = std::make_shared(path_sc); + } else { +#ifdef ENABLE_ANTLR4 + std::vector nodes = + PathNodesGenerator::invokeParser(path_sc); +#else + std::vector nodes = + IDeviceID::split_string(path_sc, '.'); +#endif + if (nodes.size() > 1) { + device_id_ = std::make_shared( + std::vector(nodes.begin(), + nodes.end() - 1)); + measurement_ = nodes[nodes.size() - 1]; + full_path_ = + device_id_->get_device_name() + "." + measurement_; + } else { + full_path_ = path_sc; + device_id_ = std::make_shared(); + measurement_ = path_sc; + } + } + } else { + full_path_ = ""; + device_id_ = std::make_shared(); + measurement_ = ""; + } + } bool operator==(const Path& path) { if (measurement_.compare(path.measurement_) == 0 && diff --git a/cpp/src/common/schema.h b/cpp/src/common/schema.h index 81008b715..a2c989af2 100644 --- a/cpp/src/common/schema.h +++ b/cpp/src/common/schema.h @@ -23,7 +23,6 @@ #include #include -#include #include // use unordered_map instead #include #include @@ -166,7 +165,6 @@ struct MeasurementSchemaGroup { MeasurementSchemaMap measurement_schema_map_; bool is_aligned_ = false; TimeChunkWriter* time_chunk_writer_ = nullptr; - int64_t last_time_ = INT64_MIN; ~MeasurementSchemaGroup() { if (time_chunk_writer_ != nullptr) { diff --git a/cpp/src/common/statistic.h b/cpp/src/common/statistic.h index bced66173..3d45b4f43 100644 --- a/cpp/src/common/statistic.h +++ b/cpp/src/common/statistic.h @@ -22,12 +22,18 @@ #include +#include #include #include "common/allocator/alloc_base.h" #include "common/allocator/byte_stream.h" #include "common/db_common.h" +#if defined(__ARM_NEON) || defined(__ARM_NEON__) +#include +#define TSFILE_HAS_NEON 1 +#endif + namespace storage { /* @@ -176,6 +182,48 @@ class Statistic { } virtual FORCE_INLINE void update(int64_t time) { ASSERT(false); } + virtual void update_time_batch(const int64_t* timestamps, uint32_t count) { + for (uint32_t i = 0; i < count; i++) { + update(timestamps[i]); + } + } + virtual void update_batch(const int64_t* timestamps, const bool* values, + uint32_t count) { + for (uint32_t i = 0; i < count; i++) { + update(timestamps[i], values[i]); + } + } + virtual void update_batch(const int64_t* timestamps, const int32_t* values, + uint32_t count) { + for (uint32_t i = 0; i < count; i++) { + update(timestamps[i], values[i]); + } + } + virtual void update_batch(const int64_t* timestamps, const int64_t* values, + uint32_t count) { + for (uint32_t i = 0; i < count; i++) { + update(timestamps[i], values[i]); + } + } + virtual void update_batch(const int64_t* timestamps, const float* values, + uint32_t count) { + for (uint32_t i = 0; i < count; i++) { + update(timestamps[i], values[i]); + } + } + virtual void update_batch(const int64_t* timestamps, const double* values, + uint32_t count) { + for (uint32_t i = 0; i < count; i++) { + update(timestamps[i], values[i]); + } + } + virtual void update_batch(const int64_t* timestamps, + const common::String* values, uint32_t count) { + for (uint32_t i = 0; i < count; i++) { + update(timestamps[i], values[i]); + } + } + virtual int serialize_to(common::ByteStream& out) { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::write_var_uint(count_, out))) { @@ -554,17 +602,17 @@ class BooleanStatistic : public Statistic { last_value_ = that.last_value_; } - FORCE_INLINE void reset() { + FORCE_INLINE void reset() override { count_ = 0; sum_value_ = 0; first_value_ = false; last_value_ = false; } - FORCE_INLINE void update(int64_t time, bool value) { + FORCE_INLINE void update(int64_t time, bool value) override { BOOL_STAT_UPDATE(time, value); } - int serialize_typed_stat(common::ByteStream& out) { + int serialize_typed_stat(common::ByteStream& out) override { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::write_ui8(first_value_ ? 1 : 0, out))) { @@ -575,7 +623,7 @@ class BooleanStatistic : public Statistic { } return ret; } - int deserialize_typed_stat(common::ByteStream& in) { + int deserialize_typed_stat(common::ByteStream& in) override { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::read_ui8((uint8_t&)first_value_, in))) { @@ -587,13 +635,15 @@ class BooleanStatistic : public Statistic { return ret; } - FORCE_INLINE common::TSDataType get_type() { return common::BOOLEAN; } + FORCE_INLINE common::TSDataType get_type() override { + return common::BOOLEAN; + } - int merge_with(Statistic* stat) { + int merge_with(Statistic* stat) override { MERGE_BOOL_STAT_FROM(BooleanStatistic, stat); } - int deep_copy_from(Statistic* stat) { + int deep_copy_from(Statistic* stat) override { DEEP_COPY_BOOL_STAT_FROM(BooleanStatistic, stat); } }; @@ -625,7 +675,7 @@ class Int32Statistic : public Statistic { last_value_ = that.last_value_; } - FORCE_INLINE void reset() { + FORCE_INLINE void reset() override { count_ = 0; sum_value_ = 0; min_value_ = 0; @@ -634,13 +684,41 @@ class Int32Statistic : public Statistic { last_value_ = 0; } - FORCE_INLINE void update(int64_t time, int32_t value) { + FORCE_INLINE void update(int64_t time, int32_t value) override { NUM_STAT_UPDATE(time, value); } - FORCE_INLINE common::TSDataType get_type() { return common::INT32; } + void update_batch(const int64_t* timestamps, const int32_t* values, + uint32_t count) override { + if (count == 0) return; + uint32_t start = 0; + if (count_ == 0) { + start_time_ = timestamps[0]; + end_time_ = timestamps[0]; + first_value_ = values[0]; + last_value_ = values[0]; + min_value_ = values[0]; + max_value_ = values[0]; + sum_value_ = (int64_t)values[0]; + count_ = 1; + start = 1; + } + for (uint32_t i = start; i < count; i++) { + if (timestamps[i] < start_time_) start_time_ = timestamps[i]; + if (timestamps[i] > end_time_) end_time_ = timestamps[i]; + if (values[i] < min_value_) min_value_ = values[i]; + if (values[i] > max_value_) max_value_ = values[i]; + sum_value_ += (int64_t)values[i]; + } + last_value_ = values[count - 1]; + count_ += (count - start); + } + + FORCE_INLINE common::TSDataType get_type() override { + return common::INT32; + } - int serialize_typed_stat(common::ByteStream& out) { + int serialize_typed_stat(common::ByteStream& out) override { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::write_ui32(min_value_, out))) { } else if (RET_FAIL(common::SerializationUtil::write_ui32(max_value_, @@ -654,7 +732,7 @@ class Int32Statistic : public Statistic { } return ret; } - int deserialize_typed_stat(common::ByteStream& in) { + int deserialize_typed_stat(common::ByteStream& in) override { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::read_ui32((uint32_t&)min_value_, in))) { @@ -676,15 +754,15 @@ class Int32Statistic : public Statistic { // << std::endl; return ret; } - int merge_with(Statistic* stat) { + int merge_with(Statistic* stat) override { MERGE_NUM_STAT_FROM(Int32Statistic, stat); } - int deep_copy_from(Statistic* stat) { + int deep_copy_from(Statistic* stat) override { DEEP_COPY_NUM_STAT_FROM(Int32Statistic, stat); } - std::string to_string() const { + std::string to_string() const override { std::ostringstream oss; oss << "{count=" << count_ << ", start_time=" << start_time_ << ", end_time=" << end_time_ << ", first_val=" << first_value_ @@ -696,7 +774,7 @@ class Int32Statistic : public Statistic { }; class DateStatistic : public Int32Statistic { - FORCE_INLINE common::TSDataType get_type() { return common::DATE; } + FORCE_INLINE common::TSDataType get_type() override { return common::DATE; } }; class Int64Statistic : public Statistic { @@ -726,7 +804,7 @@ class Int64Statistic : public Statistic { last_value_ = that.last_value_; } - FORCE_INLINE void reset() { + FORCE_INLINE void reset() override { count_ = 0; sum_value_ = 0; min_value_ = 0; @@ -734,13 +812,69 @@ class Int64Statistic : public Statistic { first_value_ = 0; last_value_ = 0; } - FORCE_INLINE void update(int64_t time, int64_t value) { + FORCE_INLINE void update(int64_t time, int64_t value) override { NUM_STAT_UPDATE(time, value); } - FORCE_INLINE common::TSDataType get_type() { return common::INT64; } + void update_batch(const int64_t* timestamps, const int64_t* values, + uint32_t count) override { + if (count == 0) return; + uint32_t start = 0; + if (count_ == 0) { + start_time_ = timestamps[0]; + end_time_ = timestamps[0]; + first_value_ = values[0]; + last_value_ = values[0]; + min_value_ = values[0]; + max_value_ = values[0]; + sum_value_ = (double)values[0]; + count_ = 1; + start = 1; + } + // Timestamps are monotonic (verified by TimePageWriter), + // so only first/last matter for start_time_/end_time_. + if (count > start) { + if (timestamps[start] < start_time_) + start_time_ = timestamps[start]; + if (timestamps[count - 1] > end_time_) + end_time_ = timestamps[count - 1]; + } + uint32_t i = start; +#if TSFILE_HAS_NEON + { + int64x2_t vmin = vdupq_n_s64(min_value_); + int64x2_t vmax = vdupq_n_s64(max_value_); + float64x2_t vsum = vdupq_n_f64(0.0); + for (; i + 2 <= count; i += 2) { + int64x2_t v = vld1q_s64(&values[i]); + // min/max via compare+select (no vminq_s64 in NEON) + uint64x2_t lt = vcltq_s64(v, vmin); + vmin = vbslq_s64(lt, v, vmin); + uint64x2_t gt = vcgtq_s64(v, vmax); + vmax = vbslq_s64(gt, v, vmax); + vsum = vaddq_f64(vsum, vcvtq_f64_s64(v)); + } + min_value_ = + std::min(vgetq_lane_s64(vmin, 0), vgetq_lane_s64(vmin, 1)); + max_value_ = + std::max(vgetq_lane_s64(vmax, 0), vgetq_lane_s64(vmax, 1)); + sum_value_ += vgetq_lane_f64(vsum, 0) + vgetq_lane_f64(vsum, 1); + } +#endif + for (; i < count; i++) { + if (values[i] < min_value_) min_value_ = values[i]; + if (values[i] > max_value_) max_value_ = values[i]; + sum_value_ += (double)values[i]; + } + last_value_ = values[count - 1]; + count_ += (count - start); + } + + FORCE_INLINE common::TSDataType get_type() override { + return common::INT64; + } - int serialize_typed_stat(common::ByteStream& out) { + int serialize_typed_stat(common::ByteStream& out) override { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::write_ui64(min_value_, out))) { } else if (RET_FAIL(common::SerializationUtil::write_ui64(max_value_, @@ -754,7 +888,7 @@ class Int64Statistic : public Statistic { } return ret; } - int deserialize_typed_stat(common::ByteStream& in) { + int deserialize_typed_stat(common::ByteStream& in) override { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::read_ui64((uint64_t&)min_value_, in))) { @@ -769,15 +903,15 @@ class Int64Statistic : public Statistic { } return ret; } - int merge_with(Statistic* stat) { + int merge_with(Statistic* stat) override { MERGE_NUM_STAT_FROM(Int64Statistic, stat); } - int deep_copy_from(Statistic* stat) { + int deep_copy_from(Statistic* stat) override { DEEP_COPY_NUM_STAT_FROM(Int64Statistic, stat); } - std::string to_string() const { + std::string to_string() const override { std::ostringstream oss; oss << "{count=" << count_ << ", start_time=" << start_time_ << ", end_time=" << end_time_ << ", first_val=" << first_value_ @@ -815,7 +949,7 @@ class FloatStatistic : public Statistic { last_value_ = that.last_value_; } - FORCE_INLINE void reset() { + FORCE_INLINE void reset() override { count_ = 0; sum_value_ = 0; min_value_ = 0; @@ -823,13 +957,15 @@ class FloatStatistic : public Statistic { first_value_ = 0; last_value_ = 0; } - FORCE_INLINE void update(int64_t time, float value) { + FORCE_INLINE void update(int64_t time, float value) override { NUM_STAT_UPDATE(time, value); } - FORCE_INLINE common::TSDataType get_type() { return common::FLOAT; } + FORCE_INLINE common::TSDataType get_type() override { + return common::FLOAT; + } - int serialize_typed_stat(common::ByteStream& out) { + int serialize_typed_stat(common::ByteStream& out) override { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::write_float(min_value_, out))) { } else if (RET_FAIL(common::SerializationUtil::write_float(max_value_, @@ -843,7 +979,7 @@ class FloatStatistic : public Statistic { } return ret; } - int deserialize_typed_stat(common::ByteStream& in) { + int deserialize_typed_stat(common::ByteStream& in) override { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::read_float(min_value_, in))) { } else if (RET_FAIL( @@ -857,10 +993,10 @@ class FloatStatistic : public Statistic { } return ret; } - int merge_with(Statistic* stat) { + int merge_with(Statistic* stat) override { MERGE_NUM_STAT_FROM(FloatStatistic, stat); } - int deep_copy_from(Statistic* stat) { + int deep_copy_from(Statistic* stat) override { DEEP_COPY_NUM_STAT_FROM(FloatStatistic, stat); } }; @@ -892,7 +1028,7 @@ class DoubleStatistic : public Statistic { last_value_ = that.last_value_; } - FORCE_INLINE void reset() { + FORCE_INLINE void reset() override { count_ = 0; sum_value_ = 0; min_value_ = 0; @@ -900,13 +1036,64 @@ class DoubleStatistic : public Statistic { first_value_ = 0; last_value_ = 0; } - FORCE_INLINE void update(int64_t time, double value) { + FORCE_INLINE void update(int64_t time, double value) override { NUM_STAT_UPDATE(time, value); } - FORCE_INLINE common::TSDataType get_type() { return common::DOUBLE; } + void update_batch(const int64_t* timestamps, const double* values, + uint32_t count) override { + if (count == 0) return; + uint32_t start = 0; + if (count_ == 0) { + start_time_ = timestamps[0]; + end_time_ = timestamps[0]; + first_value_ = values[0]; + last_value_ = values[0]; + min_value_ = values[0]; + max_value_ = values[0]; + sum_value_ = values[0]; + count_ = 1; + start = 1; + } + if (count > start) { + if (timestamps[start] < start_time_) + start_time_ = timestamps[start]; + if (timestamps[count - 1] > end_time_) + end_time_ = timestamps[count - 1]; + } + uint32_t i = start; +#if TSFILE_HAS_NEON + { + float64x2_t vmin = vdupq_n_f64(min_value_); + float64x2_t vmax = vdupq_n_f64(max_value_); + float64x2_t vsum = vdupq_n_f64(0.0); + for (; i + 2 <= count; i += 2) { + float64x2_t v = vld1q_f64(&values[i]); + vmin = vminq_f64(vmin, v); + vmax = vmaxq_f64(vmax, v); + vsum = vaddq_f64(vsum, v); + } + min_value_ = + std::min(vgetq_lane_f64(vmin, 0), vgetq_lane_f64(vmin, 1)); + max_value_ = + std::max(vgetq_lane_f64(vmax, 0), vgetq_lane_f64(vmax, 1)); + sum_value_ += vgetq_lane_f64(vsum, 0) + vgetq_lane_f64(vsum, 1); + } +#endif + for (; i < count; i++) { + if (values[i] < min_value_) min_value_ = values[i]; + if (values[i] > max_value_) max_value_ = values[i]; + sum_value_ += values[i]; + } + last_value_ = values[count - 1]; + count_ += (count - start); + } + + FORCE_INLINE common::TSDataType get_type() override { + return common::DOUBLE; + } - int serialize_typed_stat(common::ByteStream& out) { + int serialize_typed_stat(common::ByteStream& out) override { int ret = common::E_OK; if (RET_FAIL( common::SerializationUtil::write_double(min_value_, out))) { @@ -921,7 +1108,7 @@ class DoubleStatistic : public Statistic { } return ret; } - int deserialize_typed_stat(common::ByteStream& in) { + int deserialize_typed_stat(common::ByteStream& in) override { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::read_double(min_value_, in))) { } else if (RET_FAIL(common::SerializationUtil::read_double(max_value_, @@ -935,10 +1122,10 @@ class DoubleStatistic : public Statistic { } return ret; } - int merge_with(Statistic* stat) { + int merge_with(Statistic* stat) override { MERGE_NUM_STAT_FROM(DoubleStatistic, stat); } - int deep_copy_from(Statistic* stat) { + int deep_copy_from(Statistic* stat) override { DEEP_COPY_NUM_STAT_FROM(DoubleStatistic, stat); } }; @@ -960,30 +1147,50 @@ class TimeStatistic : public Statistic { end_time_ = that.end_time_; } - FORCE_INLINE void reset() { + FORCE_INLINE void reset() override { count_ = 0; start_time_ = 0; end_time_ = 0; } - FORCE_INLINE void update(int64_t time) { + FORCE_INLINE void update(int64_t time) override { TIME_STAT_UPDATE((time)); count_++; } - FORCE_INLINE common::TSDataType get_type() { return common::VECTOR; } + void update_time_batch(const int64_t* timestamps, uint32_t count) override { + if (count == 0) return; + if (count_ == 0) { + start_time_ = timestamps[0]; + end_time_ = timestamps[0]; + } + // Timestamps are already verified monotonic in TimePageWriter, + // so first element is min candidate and last is max candidate. + if (timestamps[0] < start_time_) start_time_ = timestamps[0]; + if (timestamps[count - 1] > end_time_) + end_time_ = timestamps[count - 1]; + count_ += count; + } - int serialize_typed_stat(common::ByteStream& out) { return common::E_OK; } - int deserialize_typed_stat(common::ByteStream& in) { return common::E_OK; } - int merge_with(Statistic* stat) { + FORCE_INLINE common::TSDataType get_type() override { + return common::VECTOR; + } + + int serialize_typed_stat(common::ByteStream& out) override { + return common::E_OK; + } + int deserialize_typed_stat(common::ByteStream& in) override { + return common::E_OK; + } + int merge_with(Statistic* stat) override { MERGE_TIME_STAT_FROM(TimeStatistic, stat); } - int deep_copy_from(Statistic* stat) { + int deep_copy_from(Statistic* stat) override { DEEP_COPY_TIME_STAT_FROM(TimeStatistic, stat); } - std::string to_string() const { + std::string to_string() const override { std::ostringstream oss; oss << "{count=" << count_ << ", start_time=" << start_time_ << ", end_time=" << end_time_ << "}"; @@ -992,7 +1199,9 @@ class TimeStatistic : public Statistic { }; class TimestampStatistics : public Int64Statistic { - FORCE_INLINE common::TSDataType get_type() { return common::TIMESTAMP; } + FORCE_INLINE common::TSDataType get_type() override { + return common::TIMESTAMP; + } }; class StringStatistic : public Statistic { @@ -1002,35 +1211,24 @@ class StringStatistic : public Statistic { common::String first_value_; common::String last_value_; StringStatistic() - : min_value_(), - max_value_(), - first_value_(), - last_value_(), - pa_(nullptr), - owns_pa_(true) { + : min_value_(), max_value_(), first_value_(), last_value_() { pa_ = new common::PageArena(); pa_->init(512, common::MOD_STATISTIC_OBJ); } StringStatistic(common::PageArena* pa) - : min_value_(), - max_value_(), - first_value_(), - last_value_(), - pa_(pa), - owns_pa_(false) {} + : min_value_(), max_value_(), first_value_(), last_value_(), pa_(pa) {} ~StringStatistic() { destroy(); } - void destroy() { - if (owns_pa_ && pa_) { + void destroy() override { + if (pa_) { delete pa_; pa_ = nullptr; } - owns_pa_ = false; } - FORCE_INLINE void reset() { + FORCE_INLINE void reset() override { count_ = 0; start_time_ = 0; end_time_ = 0; @@ -1050,13 +1248,15 @@ class StringStatistic : public Statistic { last_value_.dup_from(that.last_value_, *pa_); } - FORCE_INLINE void update(int64_t time, common::String value) { + FORCE_INLINE void update(int64_t time, common::String value) override { STRING_STAT_UPDATE(time, value); } - FORCE_INLINE common::TSDataType get_type() { return common::STRING; } + FORCE_INLINE common::TSDataType get_type() override { + return common::STRING; + } - int serialize_typed_stat(common::ByteStream& out) { + int serialize_typed_stat(common::ByteStream& out) override { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::write_str(first_value_, out))) { } else if (RET_FAIL(common::SerializationUtil::write_str(last_value_, @@ -1068,7 +1268,7 @@ class StringStatistic : public Statistic { } return ret; } - int deserialize_typed_stat(common::ByteStream& in) { + int deserialize_typed_stat(common::ByteStream& in) override { int ret = common::E_OK; if (RET_FAIL( common::SerializationUtil::read_str(first_value_, pa_, in))) { @@ -1081,42 +1281,39 @@ class StringStatistic : public Statistic { } return ret; } - int merge_with(Statistic* stat) { + int merge_with(Statistic* stat) override { MERGE_STRING_STAT_FROM(StringStatistic, stat); } - int deep_copy_from(Statistic* stat) { + int deep_copy_from(Statistic* stat) override { DEEP_COPY_STRING_STAT_FROM(StringStatistic, stat); } private: common::PageArena* pa_; - bool owns_pa_; }; class TextStatistic : public Statistic { public: common::String first_value_; common::String last_value_; - TextStatistic() - : first_value_(), last_value_(), pa_(nullptr), owns_pa_(true) { + TextStatistic() : first_value_(), last_value_() { pa_ = new common::PageArena(); pa_->init(512, common::MOD_STATISTIC_OBJ); } TextStatistic(common::PageArena* pa) - : first_value_(), last_value_(), pa_(pa), owns_pa_(false) {} + : first_value_(), last_value_(), pa_(pa) {} ~TextStatistic() { destroy(); } - void destroy() { - if (owns_pa_ && pa_) { + void destroy() override { + if (pa_) { delete pa_; pa_ = nullptr; } - owns_pa_ = false; } - FORCE_INLINE void reset() { + FORCE_INLINE void reset() override { count_ = 0; start_time_ = 0; end_time_ = 0; @@ -1132,13 +1329,13 @@ class TextStatistic : public Statistic { last_value_.dup_from(that.last_value_, *pa_); } - FORCE_INLINE void update(int64_t time, common::String value) { + FORCE_INLINE void update(int64_t time, common::String value) override { TEXT_STAT_UPDATE(time, value); } - FORCE_INLINE common::TSDataType get_type() { return common::TEXT; } + FORCE_INLINE common::TSDataType get_type() override { return common::TEXT; } - int serialize_typed_stat(common::ByteStream& out) { + int serialize_typed_stat(common::ByteStream& out) override { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::write_str(first_value_, out))) { } else if (RET_FAIL(common::SerializationUtil::write_str(last_value_, @@ -1146,7 +1343,7 @@ class TextStatistic : public Statistic { } return ret; } - int deserialize_typed_stat(common::ByteStream& in) { + int deserialize_typed_stat(common::ByteStream& in) override { int ret = common::E_OK; if (RET_FAIL( common::SerializationUtil::read_str(first_value_, pa_, in))) { @@ -1155,35 +1352,33 @@ class TextStatistic : public Statistic { } return ret; } - int merge_with(Statistic* stat) { + int merge_with(Statistic* stat) override { MERGE_TEXT_STAT_FROM(TextStatistic, stat); } - int deep_copy_from(Statistic* stat) { + int deep_copy_from(Statistic* stat) override { DEEP_COPY_TEXT_STAT_FROM(TextStatistic, stat); } private: common::PageArena* pa_; - bool owns_pa_; }; class BlobStatistic : public Statistic { public: - BlobStatistic() : pa_(nullptr), owns_pa_(true) { + BlobStatistic() { pa_ = new common::PageArena(); pa_->init(512, common::MOD_STATISTIC_OBJ); } - BlobStatistic(common::PageArena* pa) : pa_(pa), owns_pa_(false) {} + BlobStatistic(common::PageArena* pa) {} ~BlobStatistic() { destroy(); } void destroy() { - if (owns_pa_ && pa_) { + if (pa_) { delete pa_; pa_ = nullptr; } - owns_pa_ = false; } FORCE_INLINE void reset() { @@ -1214,7 +1409,6 @@ class BlobStatistic : public Statistic { private: common::PageArena* pa_; - bool owns_pa_; }; FORCE_INLINE uint32_t get_typed_statistic_sizeof(common::TSDataType type) { diff --git a/cpp/src/common/tablet.cc b/cpp/src/common/tablet.cc index d71e48384..014b3931e 100644 --- a/cpp/src/common/tablet.cc +++ b/cpp/src/common/tablet.cc @@ -98,14 +98,8 @@ int Tablet::init() { case BLOB: case TEXT: case STRING: { - auto* sc = static_cast(common::mem_alloc( - sizeof(StringColumn), common::MOD_TABLET)); - if (sc == nullptr) return E_OOM; - new (sc) StringColumn(); - // 8 bytes/row is a conservative initial estimate for short - // string columns (e.g. device IDs, tags). The buffer grows - // automatically on demand via mem_realloc. - sc->init(max_row_num_, max_row_num_ * 8); + auto* sc = new StringColumn(); + sc->init(max_row_num_, max_row_num_ * 32); value_matrix_[c].string_col = sc; break; } @@ -120,8 +114,9 @@ int Tablet::init() { if (bitmaps_ == nullptr) return E_OOM; for (size_t c = 0; c < schema_count; c++) { new (&bitmaps_[c]) BitMap(); - bitmaps_[c].init(max_row_num_, false); + bitmaps_[c].init(max_row_num_, false, common::MOD_TABLET); } + return E_OK; } @@ -156,7 +151,7 @@ void Tablet::destroy() { case TEXT: case STRING: value_matrix_[c].string_col->destroy(); - common::mem_free(value_matrix_[c].string_col); + delete value_matrix_[c].string_col; break; default: break; @@ -192,9 +187,7 @@ int Tablet::add_timestamp(uint32_t row_index, int64_t timestamp) { } int Tablet::set_timestamps(const int64_t* timestamps, uint32_t count) { - if (err_code_ != E_OK) { - return err_code_; - } + if (err_code_ != E_OK) return err_code_; ASSERT(timestamps_ != NULL); if (UNLIKELY(count > static_cast(max_row_num_))) { return E_OUT_OF_RANGE; @@ -206,15 +199,10 @@ int Tablet::set_timestamps(const int64_t* timestamps, uint32_t count) { int Tablet::set_column_values(uint32_t schema_index, const void* data, const uint8_t* bitmap, uint32_t count) { - if (err_code_ != E_OK) { - return err_code_; - } - if (UNLIKELY(schema_index >= schema_vec_->size())) { + if (err_code_ != E_OK) return err_code_; + if (UNLIKELY(schema_index >= schema_vec_->size())) return E_OUT_OF_RANGE; + if (UNLIKELY(count > static_cast(max_row_num_))) return E_OUT_OF_RANGE; - } - if (UNLIKELY(count > static_cast(max_row_num_))) { - return E_OUT_OF_RANGE; - } const MeasurementSchema& schema = schema_vec_->at(schema_index); size_t elem_size = 0; @@ -258,47 +246,40 @@ int Tablet::set_column_values(uint32_t schema_index, const void* data, return E_OK; } -int Tablet::set_column_string_values(uint32_t schema_index, - const int32_t* offsets, const char* data, - const uint8_t* bitmap, uint32_t count) { - if (err_code_ != E_OK) { - return err_code_; - } - if (UNLIKELY(schema_index >= schema_vec_->size())) { - return E_OUT_OF_RANGE; - } - if (UNLIKELY(count > static_cast(max_row_num_))) { +int Tablet::set_column_string_repeated(uint32_t schema_index, const char* str, + uint32_t str_len, uint32_t count) { + if (err_code_ != E_OK) return err_code_; + if (UNLIKELY(schema_index >= schema_vec_->size())) return E_OUT_OF_RANGE; + if (UNLIKELY(count > static_cast(max_row_num_))) return E_OUT_OF_RANGE; - } StringColumn* sc = value_matrix_[schema_index].string_col; - if (sc == nullptr) { - return E_INVALID_ARG; - } + if (sc == nullptr) return E_INVALID_ARG; - uint32_t total_bytes = static_cast(offsets[count]); + uint32_t total_bytes = str_len * count; if (total_bytes > sc->buf_capacity) { sc->buf_capacity = total_bytes; sc->buffer = (char*)mem_realloc(sc->buffer, sc->buf_capacity); } - if (total_bytes > 0) { - std::memcpy(sc->buffer, data, total_bytes); + for (uint32_t i = 0; i < count; i++) { + sc->offsets[i] = i * str_len; + memcpy(sc->buffer + i * str_len, str, str_len); } - std::memcpy(sc->offsets, offsets, (count + 1) * sizeof(int32_t)); + sc->offsets[count] = total_bytes; sc->buf_used = total_bytes; - if (bitmap == nullptr) { - bitmaps_[schema_index].clear_all(); - } else { - char* tsfile_bm = bitmaps_[schema_index].get_bitmap(); - uint32_t bm_bytes = (count + 7) / 8; - std::memcpy(tsfile_bm, bitmap, bm_bytes); - } + bitmaps_[schema_index].clear_all(); cur_row_size_ = std::max(count, cur_row_size_); return E_OK; } +void Tablet::reset(uint32_t row_count) { + ASSERT(row_count <= max_row_num_); + cur_row_size_ = row_count; + reset_string_columns(); +} + void* Tablet::get_value(int row_index, uint32_t schema_index, common::TSDataType& data_type) const { if (UNLIKELY(schema_index >= schema_vec_->size())) { @@ -332,8 +313,6 @@ void* Tablet::get_value(int row_index, uint32_t schema_index, double* double_values = column_values.double_data; return &double_values[row_index]; } - case TEXT: - case BLOB: case STRING: { return &column_values.string_col->get_string_view(row_index); } @@ -502,75 +481,41 @@ void Tablet::reset_string_columns() { } } -// Find all row indices where the device ID changes. A device ID is the -// composite key formed by all id columns (e.g. region + sensor_id). Row i -// is a boundary when at least one id column differs between row i-1 and row i. -// -// Example (2 id columns: region, sensor_id): -// row 0: "A", "s1" -// row 1: "A", "s2" <- boundary: sensor_id changed -// row 2: "B", "s1" <- boundary: region changed -// row 3: "B", "s1" -// row 4: "B", "s2" <- boundary: sensor_id changed -// result: [1, 2, 4] -// -// Boundaries are computed in one shot at flush time rather than maintained -// incrementally during add_value / set_column_*. The total work is similar -// either way, but batch computation here is far more CPU-friendly: the inner -// loop is a tight memcmp scan over contiguous buffers with good cache -// locality, and the CPU can pipeline comparisons without the branch overhead -// and cache thrashing of per-row bookkeeping spread across the write path. std::vector Tablet::find_all_device_boundaries() const { const uint32_t row_count = get_cur_row_size(); if (row_count <= 1) return {}; + // Use uint64_t bitmap instead of vector for faster set/test/scan. const uint32_t nwords = (row_count + 63) / 64; std::vector boundary(nwords, 0); - uint32_t boundary_count = 0; - const uint32_t max_boundaries = row_count - 1; - for (auto it = id_column_indexes_.rbegin(); it != id_column_indexes_.rend(); - ++it) { - const StringColumn& sc = *value_matrix_[*it].string_col; - const int32_t* off = sc.offsets; + for (auto col_idx : id_column_indexes_) { + const StringColumn& sc = *value_matrix_[col_idx].string_col; + const uint32_t* off = sc.offsets; const char* buf = sc.buffer; for (uint32_t i = 1; i < row_count; i++) { if (boundary[i >> 6] & (1ULL << (i & 63))) continue; - int32_t len_a = off[i] - off[i - 1]; - int32_t len_b = off[i + 1] - off[i]; + uint32_t len_a = off[i] - off[i - 1]; + uint32_t len_b = off[i + 1] - off[i]; if (len_a != len_b || - (len_a > 0 && memcmp(buf + off[i - 1], buf + off[i], - static_cast(len_a)) != 0)) { + (len_a > 0 && + memcmp(buf + off[i - 1], buf + off[i], len_a) != 0)) { boundary[i >> 6] |= (1ULL << (i & 63)); - if (++boundary_count >= max_boundaries) break; } } - if (boundary_count >= max_boundaries) break; } - // Sweep the bitmap word by word, extracting set bit positions in order. - // Each word covers 64 consecutive rows: word w covers rows [w*64, w*64+63]. - // - // For each word we use two standard bit tricks: - // __builtin_ctzll(bits) — count trailing zeros = index of lowest set bit - // bits &= bits - 1 — clear the lowest set bit - // - // Example: w=1, bits=0b...00010100 (bits 2 and 4 set) - // iter 1: ctzll=2 → idx=1*64+2=66, bits becomes 0b...00010000 - // iter 2: ctzll=4 → idx=1*64+4=68, bits becomes 0b...00000000 → exit - // - // Guards: idx>0 because row 0 can never be a boundary (no predecessor); - // idx result; for (uint32_t w = 0; w < nwords; w++) { uint64_t bits = boundary[w]; while (bits) { - uint32_t bit = bitops::ctz64_nonzero(bits); + uint32_t bit = __builtin_ctzll(bits); uint32_t idx = w * 64 + bit; if (idx > 0 && idx < row_count) { result.push_back(idx); } - bits &= bits - 1; + bits &= bits - 1; // clear lowest set bit } } return result; @@ -609,4 +554,4 @@ std::shared_ptr Tablet::get_device_id(int i) const { return res; } -} // end namespace storage \ No newline at end of file +} // end namespace storage diff --git a/cpp/src/common/tablet.h b/cpp/src/common/tablet.h index 799d6b7cc..418f8ca73 100644 --- a/cpp/src/common/tablet.h +++ b/cpp/src/common/tablet.h @@ -22,7 +22,6 @@ #include #include -#include #include #include "common/config/config.h" @@ -47,11 +46,10 @@ class TabletColIterator; * with their associated metadata such as column names and types. */ class Tablet { - public: // Arrow-style string column: offsets + contiguous buffer. // string[i] = buffer + offsets[i], len = offsets[i+1] - offsets[i] struct StringColumn { - int32_t* offsets; // length: max_rows + 1 (Arrow-compatible) + uint32_t* offsets; // length: max_rows + 1 char* buffer; // contiguous string data uint32_t buf_capacity; // allocated buffer size uint32_t buf_used; // bytes written so far @@ -60,12 +58,11 @@ class Tablet { : offsets(nullptr), buffer(nullptr), buf_capacity(0), buf_used(0) {} void init(uint32_t max_rows, uint32_t init_buf_capacity) { - offsets = (int32_t*)common::mem_alloc( - sizeof(int32_t) * (max_rows + 1), common::MOD_DEFAULT); + offsets = (uint32_t*)common::mem_alloc( + sizeof(uint32_t) * (max_rows + 1), common::MOD_TABLET); offsets[0] = 0; buf_capacity = init_buf_capacity; - buffer = - (char*)common::mem_alloc(buf_capacity, common::MOD_DEFAULT); + buffer = (char*)common::mem_alloc(buf_capacity, common::MOD_TABLET); buf_used = 0; } @@ -89,8 +86,8 @@ class Tablet { buffer = (char*)common::mem_realloc(buffer, buf_capacity); } memcpy(buffer + buf_used, data, len); - offsets[row] = static_cast(buf_used); - offsets[row + 1] = static_cast(buf_used + len); + offsets[row] = buf_used; + offsets[row + 1] = buf_used + len; buf_used += len; } @@ -98,14 +95,13 @@ class Tablet { return buffer + offsets[row]; } uint32_t get_len(uint32_t row) const { - return static_cast(offsets[row + 1] - offsets[row]); + return offsets[row + 1] - offsets[row]; } // Return a String view for a given row. The returned reference is // valid until the next call to get_string_view on this column. common::String& get_string_view(uint32_t row) { view_cache_.buf_ = buffer + offsets[row]; - view_cache_.len_ = - static_cast(offsets[row + 1] - offsets[row]); + view_cache_.len_ = offsets[row + 1] - offsets[row]; return view_cache_; } @@ -231,64 +227,12 @@ class Tablet { ~Tablet() { destroy(); } - // Tablet owns raw heap buffers (timestamps_, value_matrix_, bitmaps_) that - // destroy() frees. The implicitly generated copy operations would shallow- - // copy those pointers, causing double-free / use-after-free, so copying is - // disabled. Move transfers ownership and leaves the source empty (its - // pointers nulled) so the moved-from object destructs harmlessly. - Tablet(const Tablet&) = delete; - Tablet& operator=(const Tablet&) = delete; - - Tablet(Tablet&& other) noexcept - : err_code_(other.err_code_), - max_row_num_(other.max_row_num_), - cur_row_size_(other.cur_row_size_), - insert_target_name_(std::move(other.insert_target_name_)), - schema_vec_(std::move(other.schema_vec_)), - schema_map_(std::move(other.schema_map_)), - timestamps_(other.timestamps_), - value_matrix_(other.value_matrix_), - bitmaps_(other.bitmaps_), - column_categories_(std::move(other.column_categories_)), - id_column_indexes_(std::move(other.id_column_indexes_)) { - other.timestamps_ = nullptr; - other.value_matrix_ = nullptr; - other.bitmaps_ = nullptr; - } - - Tablet& operator=(Tablet&& other) noexcept { - if (this != &other) { - destroy(); - err_code_ = other.err_code_; - max_row_num_ = other.max_row_num_; - cur_row_size_ = other.cur_row_size_; - insert_target_name_ = std::move(other.insert_target_name_); - schema_vec_ = std::move(other.schema_vec_); - schema_map_ = std::move(other.schema_map_); - timestamps_ = other.timestamps_; - value_matrix_ = other.value_matrix_; - bitmaps_ = other.bitmaps_; - column_categories_ = std::move(other.column_categories_); - id_column_indexes_ = std::move(other.id_column_indexes_); - other.timestamps_ = nullptr; - other.value_matrix_ = nullptr; - other.bitmaps_ = nullptr; - } - return *this; - } - const std::string& get_table_name() const { return insert_target_name_; } void set_table_name(const std::string& table_name) { insert_target_name_ = table_name; } size_t get_column_count() const { return schema_vec_->size(); } uint32_t get_cur_row_size() const { return cur_row_size_; } - int64_t get_timestamp(uint32_t row_index) const { - return timestamps_[row_index]; - } - bool is_null(uint32_t row_index, uint32_t col_index) const { - return bitmaps_[col_index].test(row_index); - } /** * @brief Adds a timestamp to the specified row. @@ -300,25 +244,21 @@ class Tablet { */ int add_timestamp(uint32_t row_index, int64_t timestamp); - /** - * @brief Bulk copy timestamps into the tablet. - * - * @param timestamps Pointer to an array of timestamp values. - * @param count Number of timestamps to copy. Must be <= max_row_num. - * If count > cur_row_size_, cur_row_size_ is updated to count, - * so that subsequent operations know how many rows are populated. - * @return Returns 0 on success, or a non-zero error code on failure - * (E_OUT_OF_RANGE if count > max_row_num). - */ int set_timestamps(const int64_t* timestamps, uint32_t count); - // Bulk copy fixed-length column data. If bitmap is nullptr, all rows are - // non-null. Otherwise bit=1 means null, bit=0 means valid (same as TsFile - // BitMap convention). Callers using other conventions (e.g. Arrow, where - // 1=valid) must invert before calling. + // Bulk copy fixed-length column data. bitmap=nullptr means all non-null. + // bitmap uses TsFile convention: bit=1 is null, bit=0 is valid. int set_column_values(uint32_t schema_index, const void* data, const uint8_t* bitmap, uint32_t count); + // Bulk fill a STRING column with the same value for all rows. + int set_column_string_repeated(uint32_t schema_index, const char* str, + uint32_t str_len, uint32_t count); + + // Reset per-batch state so the tablet can be reused without reallocating + // its backing buffers. row_count is typically 0 before refilling. + void reset(uint32_t row_count = 0); + void* get_value(int row_index, uint32_t schema_index, common::TSDataType& data_type) const; /** @@ -341,14 +281,10 @@ class Tablet { std::shared_ptr get_device_id(int i) const; std::vector find_all_device_boundaries() const; - // Bulk copy string column data (offsets + data buffer). - // offsets has count+1 entries and must start from 0 (offsets[0] == 0). - // bitmap follows TsFile convention (bit=1 means null, nullptr means all - // valid). Callers using Arrow convention (bit=1 means valid) must invert - // before calling. - int set_column_string_values(uint32_t schema_index, const int32_t* offsets, - const char* data, const uint8_t* bitmap, - uint32_t count); + // When the caller guarantees that all rows belong to a single device, + // set this flag to skip the O(n*m) boundary detection in the write path. + void set_single_device(bool v) { single_device_ = v; } + bool is_single_device() const { return single_device_; } /** * @brief Template function to add a value of type T to the specified row * and column by name. @@ -406,6 +342,7 @@ class Tablet { common::BitMap* bitmaps_; std::vector column_categories_; std::vector id_column_indexes_; + bool single_device_ = false; }; } // end namespace storage diff --git a/cpp/src/common/thread_pool.h b/cpp/src/common/thread_pool.h index f82aea038..9285d4ff4 100644 --- a/cpp/src/common/thread_pool.h +++ b/cpp/src/common/thread_pool.h @@ -27,7 +27,6 @@ #include #include #include -#include #include namespace common { diff --git a/cpp/src/common/tsblock/tsblock.h b/cpp/src/common/tsblock/tsblock.h index 859ad393d..80869ec41 100644 --- a/cpp/src/common/tsblock/tsblock.h +++ b/cpp/src/common/tsblock/tsblock.h @@ -144,6 +144,12 @@ class RowAppender { ASSERT(tsblock_->row_count_ > 0); tsblock_->row_count_--; } + FORCE_INLINE uint32_t remaining() const { + return tsblock_->max_row_count_ - tsblock_->row_count_; + } + FORCE_INLINE void add_rows(uint32_t count) { + tsblock_->row_count_ += count; + } FORCE_INLINE void append(uint32_t slot_index, const char* value, uint32_t len) { @@ -222,6 +228,19 @@ class ColAppender { } FORCE_INLINE void reset() { column_row_count_ = 0; } + FORCE_INLINE void bulk_append_fixed(const char* data, uint32_t count, + uint32_t elem_size) { + vec_->get_value_data().append_fixed_value(data, count * elem_size); + vec_->add_row_nums(count); + column_row_count_ += count; + } + + FORCE_INLINE uint32_t get_column_row_count() const { + return column_row_count_; + } + + FORCE_INLINE Vector* get_vector() { return vec_; } + private: uint32_t column_index_; uint32_t column_row_count_; @@ -252,16 +271,14 @@ class RowIterator { FORCE_INLINE void next() { ASSERT(row_id_ < tsblock_->row_count_); ++row_id_; + const uint32_t current_row_id = row_id_ - 1; for (uint32_t i = 0; i < column_count_; ++i) { - tsblock_->vectors_[i]->update_offset(); + if (!tsblock_->vectors_[i]->is_null(current_row_id)) { + tsblock_->vectors_[i]->update_offset(); + } } } - FORCE_INLINE void next(size_t ind) const { - ASSERT(row_id_ < tsblock_->row_count_); - tsblock_->vectors_[ind]->update_offset(); - } - FORCE_INLINE void update_row_id() { row_id_++; } FORCE_INLINE char* read(uint32_t column_index, uint32_t* __restrict len, @@ -311,6 +328,23 @@ class ColIterator { FORCE_INLINE uint32_t get_column_index() { return column_index_; } + FORCE_INLINE uint32_t remaining() const { + return tsblock_->row_count_ - row_id_; + } + FORCE_INLINE char* data_ptr() { + return vec_->get_value_data().get_data() + vec_->get_offset(); + } + FORCE_INLINE void advance(uint32_t n, uint32_t elem_size) { + row_id_ += n; + vec_->advance_offset(n * elem_size); + } + + FORCE_INLINE void advance_row_only(uint32_t n) { row_id_ += n; } + + FORCE_INLINE uint32_t get_row_id() const { return row_id_; } + + FORCE_INLINE Vector* get_vector() { return vec_; } + private: uint32_t column_index_; uint32_t row_id_; diff --git a/cpp/src/common/tsblock/vector/vector.h b/cpp/src/common/tsblock/vector/vector.h index 37a96c543..dde3e76cc 100644 --- a/cpp/src/common/tsblock/vector/vector.h +++ b/cpp/src/common/tsblock/vector/vector.h @@ -73,6 +73,9 @@ class Vector { FORCE_INLINE uint32_t get_row_num() { return row_num_; } FORCE_INLINE void add_row_num() { row_num_++; } + FORCE_INLINE void add_row_nums(uint32_t n) { row_num_ += n; } + FORCE_INLINE uint32_t get_offset() const { return offset_; } + FORCE_INLINE void advance_offset(uint32_t bytes) { offset_ += bytes; } FORCE_INLINE common::TsBlock* get_tsblock() { return tsblock_; } diff --git a/cpp/src/common/tsfile_common.cc b/cpp/src/common/tsfile_common.cc index a3fcc0a70..7d79b90e8 100644 --- a/cpp/src/common/tsfile_common.cc +++ b/cpp/src/common/tsfile_common.cc @@ -103,13 +103,8 @@ int TSMIterator::init() { chunk_meta_iter_++; } if (!tmp.empty()) { - auto& merged = - tsm_chunk_meta_info_[chunk_group_meta_iter_.get()->device_id_]; - for (auto& m_entry : tmp) { - auto& vec = merged[m_entry.first]; - vec.insert(vec.end(), m_entry.second.begin(), - m_entry.second.end()); - } + tsm_chunk_meta_info_[chunk_group_meta_iter_.get()->device_id_] = + tmp; } chunk_group_meta_iter_++; diff --git a/cpp/src/common/tsfile_common.h b/cpp/src/common/tsfile_common.h index b516b608f..52fe4ef65 100644 --- a/cpp/src/common/tsfile_common.h +++ b/cpp/src/common/tsfile_common.h @@ -41,14 +41,14 @@ namespace storage { -extern TSFILE_API const char* MAGIC_STRING_TSFILE; +extern const char* MAGIC_STRING_TSFILE; constexpr int MAGIC_STRING_TSFILE_LEN = 6; -extern TSFILE_API const char VERSION_NUM_BYTE; -extern TSFILE_API const char CHUNK_GROUP_HEADER_MARKER; -extern TSFILE_API const char CHUNK_HEADER_MARKER; -extern TSFILE_API const char ONLY_ONE_PAGE_CHUNK_HEADER_MARKER; -extern TSFILE_API const char SEPARATOR_MARKER; -extern TSFILE_API const char OPERATION_INDEX_RANGE; +extern const char VERSION_NUM_BYTE; +extern const char CHUNK_GROUP_HEADER_MARKER; +extern const char CHUNK_HEADER_MARKER; +extern const char ONLY_ONE_PAGE_CHUNK_HEADER_MARKER; +extern const char SEPARATOR_MARKER; +extern const char OPERATION_INDEX_RANGE; // TODO review the String.len_ used @@ -314,6 +314,11 @@ class ITimeseriesIndex { virtual common::SimpleList* get_value_chunk_meta_list() const { return nullptr; } + virtual uint32_t get_value_column_count() const { return 1; } + virtual common::SimpleList* get_value_chunk_meta_list( + uint32_t col_index) const { + return col_index == 0 ? get_value_chunk_meta_list() : nullptr; + } virtual common::String get_measurement_name() const { return common::String(); @@ -321,7 +326,6 @@ class ITimeseriesIndex { virtual common::TSDataType get_data_type() const { return common::INVALID_DATATYPE; } - virtual bool is_aligned() const { return false; } virtual Statistic* get_statistic() const { return nullptr; } }; @@ -590,10 +594,8 @@ class AlignedTimeseriesIndex : public ITimeseriesIndex { return value_ts_idx_->get_measurement_name(); } virtual common::TSDataType get_data_type() const { - return value_ts_idx_ == nullptr ? common::INVALID_DATATYPE - : value_ts_idx_->get_data_type(); + return time_ts_idx_->get_data_type(); } - virtual bool is_aligned() const { return true; } virtual Statistic* get_statistic() const { return value_ts_idx_->get_statistic(); } @@ -608,6 +610,47 @@ class AlignedTimeseriesIndex : public ITimeseriesIndex { #endif }; +class MultiAlignedTimeseriesIndex : public ITimeseriesIndex { + public: + TimeseriesIndex* time_ts_idx_ = nullptr; + std::vector value_ts_idxs_; + + MultiAlignedTimeseriesIndex() {} + ~MultiAlignedTimeseriesIndex() {} + + common::SimpleList* get_time_chunk_meta_list() const override { + return time_ts_idx_ ? time_ts_idx_->get_chunk_meta_list() : nullptr; + } + common::SimpleList* get_value_chunk_meta_list() const override { + return value_ts_idxs_.empty() + ? nullptr + : value_ts_idxs_[0]->get_chunk_meta_list(); + } + uint32_t get_value_column_count() const override { + return value_ts_idxs_.size(); + } + common::SimpleList* get_value_chunk_meta_list( + uint32_t col_index) const override { + return col_index < value_ts_idxs_.size() + ? value_ts_idxs_[col_index]->get_chunk_meta_list() + : nullptr; + } + common::String get_measurement_name() const override { + return value_ts_idxs_.empty() + ? common::String() + : value_ts_idxs_[0]->get_measurement_name(); + } + common::TSDataType get_data_type() const override { + return time_ts_idx_ ? time_ts_idx_->get_data_type() + : common::INVALID_DATATYPE; + } + Statistic* get_statistic() const override { return nullptr; } + + const std::vector& get_value_indices() const { + return value_ts_idxs_; + } +}; + class TSMIterator { public: explicit TSMIterator( @@ -631,14 +674,13 @@ class TSMIterator { // timeseries measurenemnt chunk meta info // map >> std::map, - std::map>, - IDeviceIDComparator> + std::map>> tsm_chunk_meta_info_; // device iterator std::map, - std::map>, - IDeviceIDComparator>::iterator tsm_device_iter_; + std::map>>::iterator + tsm_device_iter_; // measurement iterator std::map>::iterator diff --git a/cpp/src/compress/lz4_compressor.cc b/cpp/src/compress/lz4_compressor.cc index 88c64466f..f4aa2fb26 100644 --- a/cpp/src/compress/lz4_compressor.cc +++ b/cpp/src/compress/lz4_compressor.cc @@ -76,9 +76,13 @@ int LZ4Compressor::compress(char* uncompressed_buf, } void LZ4Compressor::after_compress(char* compressed_buf) { + // See SnappyCompressor::after_compress for the same reasoning: the member + // pointer can lag behind the caller-known buffer across page reuse. if (compressed_buf != nullptr) { - mem_free(compressed_buf_); - compressed_buf_ = nullptr; + mem_free(compressed_buf); + if (compressed_buf_ == compressed_buf) { + compressed_buf_ = nullptr; + } } } diff --git a/cpp/src/compress/snappy_compressor.cc b/cpp/src/compress/snappy_compressor.cc index 6a2735e7b..d35458b94 100644 --- a/cpp/src/compress/snappy_compressor.cc +++ b/cpp/src/compress/snappy_compressor.cc @@ -73,9 +73,16 @@ int SnappyCompressor::compress(char* uncompressed_buf, } void SnappyCompressor::after_compress(char* compressed_buf) { + // Free the buffer the caller is releasing, not whatever we last cached in + // compressed_buf_. The member is only kept so destroy() can clean up if + // after_compress is never called. When the same compressor is reused + // across pages, compressed_buf_ may point to a different (live) allocation + // or be null by the time the caller releases an earlier page's buffer. if (compressed_buf != nullptr) { - mem_free(compressed_buf_); - compressed_buf_ = nullptr; + mem_free(compressed_buf); + if (compressed_buf_ == compressed_buf) { + compressed_buf_ = nullptr; + } } } diff --git a/cpp/src/compress/uncompressed_compressor.h b/cpp/src/compress/uncompressed_compressor.h index c262837a8..50aa13fc3 100644 --- a/cpp/src/compress/uncompressed_compressor.h +++ b/cpp/src/compress/uncompressed_compressor.h @@ -26,13 +26,27 @@ namespace storage { class UncompressedCompressor : public Compressor { public: - UncompressedCompressor() {} - virtual ~UncompressedCompressor() {} + UncompressedCompressor() : uncompressed_buf_(nullptr) {} + virtual ~UncompressedCompressor() { + if (uncompressed_buf_ != nullptr) { + common::mem_free(uncompressed_buf_); + uncompressed_buf_ = nullptr; + } + } int reset(bool for_compress) { UNUSED(for_compress); + if (uncompressed_buf_ != nullptr) { + common::mem_free(uncompressed_buf_); + uncompressed_buf_ = nullptr; + } return common::E_OK; } - void destroy() {} + void destroy() { + if (uncompressed_buf_ != nullptr) { + common::mem_free(uncompressed_buf_); + uncompressed_buf_ = nullptr; + } + } int compress(char* uncompressed_buf, uint32_t uncompressed_buf_len, char*& compressed_buf, uint32_t& compressed_buf_len) { compressed_buf = uncompressed_buf; @@ -43,11 +57,26 @@ class UncompressedCompressor : public Compressor { int uncompress(char* compressed_buf, uint32_t compressed_buf_len, char*& uncompressed_buf, uint32_t& uncompressed_buf_len) { - uncompressed_buf = compressed_buf; + char* buf = static_cast( + common::mem_alloc(compressed_buf_len, common::MOD_COMPRESSOR_OBJ)); + if (buf == nullptr) { + return common::E_OOM; + } + memcpy(buf, compressed_buf, compressed_buf_len); + uncompressed_buf = buf; + uncompressed_buf_ = buf; uncompressed_buf_len = compressed_buf_len; return common::E_OK; } - void after_uncompress(char* uncompressed_buf) { UNUSED(uncompressed_buf); } + void after_uncompress(char* uncompressed_buf) { + if (uncompressed_buf != nullptr) { + common::mem_free(uncompressed_buf_); + uncompressed_buf_ = nullptr; + } + } + + private: + char* uncompressed_buf_; }; } // end namespace storage diff --git a/cpp/src/cwrapper/arrow_c.cc b/cpp/src/cwrapper/arrow_c.cc index 931c17de7..6f56cfc6a 100644 --- a/cpp/src/cwrapper/arrow_c.cc +++ b/cpp/src/cwrapper/arrow_c.cc @@ -714,43 +714,6 @@ int TsBlockToArrowStruct(common::TsBlock& tsblock, ArrowArray* out_array, return common::E_OK; } -// Allocate and return a TsFile null bitmap (bit=1=null) by inverting an Arrow -// validity bitmap (bit=1=valid). bit_offset is the Arrow array's offset field; -// bits [bit_offset, bit_offset+n_rows) are extracted and inverted. -// Returns nullptr if validity is nullptr (all rows valid, no allocation needed) -// or on OOM. Caller must mem_free the result. -// To distinguish OOM from "no validity": OOM only when validity!=nullptr && -// result==nullptr. -static uint8_t* InvertArrowBitmap(const uint8_t* validity, int64_t bit_offset, - uint32_t n_rows) { - if (validity == nullptr) { - return nullptr; - } - uint32_t bm_bytes = (n_rows + 7) / 8; - uint8_t* null_bm = - static_cast(common::mem_alloc(bm_bytes, common::MOD_TSBLOCK)); - if (null_bm == nullptr) { - return nullptr; - } - if (bit_offset == 0) { - // Fast path: byte-level invert when there is no bit misalignment. - for (uint32_t b = 0; b < bm_bytes; b++) { - null_bm[b] = ~validity[b]; - } - } else { - // Sliced array: extract one bit at a time starting at bit_offset. - std::memset(null_bm, 0, bm_bytes); - for (uint32_t i = 0; i < n_rows; i++) { - int64_t src = bit_offset + i; - uint8_t valid = (validity[src / 8] >> (src % 8)) & 1; - if (!valid) { - null_bm[i / 8] |= static_cast(1u << (i % 8)); - } - } - } - return null_bm; -} - // Check if Arrow row is valid (non-null) based on validity bitmap static bool ArrowIsValid(const ArrowArray* arr, int64_t row) { if (arr->null_count == 0 || arr->buffers[0] == nullptr) return true; @@ -851,13 +814,6 @@ int ArrowStructToTablet(const char* table_name, const ArrowArray* in_array, const ArrowArray* col_arr = in_array->children[data_col_indices[ci]]; common::TSDataType dtype = read_modes[ci]; uint32_t tcol = static_cast(ci); - // ArrowArray::offset is non-zero when the array is a slice of a larger - // buffer — for example, when Python pandas/PyArrow passes a column that - // was created via slice(), take(), or filter() without a copy, or when - // RecordBatch::Slice() is used to split a batch. In those cases the - // underlying buffer starts at element 0 of the original allocation, so - // all buffer accesses (data, offsets, validity bitmap) must be shifted - // by `off` before reading the `length` visible elements. int64_t off = col_arr->offset; const uint8_t* validity = @@ -881,21 +837,26 @@ int ArrowStructToTablet(const char* table_name, const ArrowArray* in_array, case common::INT64: case common::FLOAT: case common::DOUBLE: { - size_t elem_size = - (dtype == common::INT64 || dtype == common::DOUBLE) ? 8 : 4; - const void* data = - static_cast(col_arr->buffers[1]) + - off * elem_size; - uint8_t* null_bm = InvertArrowBitmap( - validity, off, static_cast(n_rows)); - if (validity != nullptr && null_bm == nullptr) { - delete tablet; - return common::E_OOM; + // Invert Arrow bitmap (1=valid) to TsFile bitmap (1=null) + const uint8_t* null_bm = nullptr; + uint8_t* inverted_bm = nullptr; + if (validity != nullptr) { + uint32_t bm_bytes = (static_cast(n_rows) + 7) / 8; + inverted_bm = static_cast( + common::mem_alloc(bm_bytes, common::MOD_TSBLOCK)); + if (inverted_bm == nullptr) { + delete tablet; + return common::E_OOM; + } + for (uint32_t b = 0; b < bm_bytes; b++) { + inverted_bm[b] = ~validity[b]; + } + null_bm = inverted_bm; } - tablet->set_column_values(tcol, data, null_bm, + tablet->set_column_values(tcol, col_arr->buffers[1], null_bm, static_cast(n_rows)); - if (null_bm != nullptr) { - common::mem_free(null_bm); + if (inverted_bm != nullptr) { + common::mem_free(inverted_bm); } break; } @@ -916,45 +877,16 @@ int ArrowStructToTablet(const char* table_name, const ArrowArray* in_array, case common::TEXT: case common::STRING: case common::BLOB: { - // set_column_string_values requires offsets[0] == 0. - // When off > 0 (sliced Arrow array), normalize here: shift - // offsets down by base and advance the data pointer - // accordingly. - const int32_t* raw_offsets = - static_cast(col_arr->buffers[1]) + off; - const char* raw_data = + const int32_t* offsets = + static_cast(col_arr->buffers[1]); + const char* data = static_cast(col_arr->buffers[2]); - uint32_t nrows = static_cast(n_rows); - const int32_t* offsets = raw_offsets; - const char* data = raw_data; - int32_t* norm_offsets = nullptr; - if (off > 0) { - int32_t base = raw_offsets[0]; - norm_offsets = static_cast(common::mem_alloc( - (nrows + 1) * sizeof(int32_t), common::MOD_TSBLOCK)); - if (norm_offsets == nullptr) { - delete tablet; - return common::E_OOM; - } - for (uint32_t i = 0; i <= nrows; i++) { - norm_offsets[i] = raw_offsets[i] - base; - } - offsets = norm_offsets; - data = raw_data + base; - } - uint8_t* null_bm = InvertArrowBitmap(validity, off, nrows); - if (validity != nullptr && null_bm == nullptr) { - common::mem_free(norm_offsets); - delete tablet; - return common::E_OOM; - } - tablet->set_column_string_values(tcol, offsets, data, null_bm, - nrows); - if (null_bm != nullptr) { - common::mem_free(null_bm); - } - if (norm_offsets != nullptr) { - common::mem_free(norm_offsets); + for (int64_t r = 0; r < n_rows; r++) { + if (!ArrowIsValid(col_arr, r)) continue; + int32_t start = offsets[off + r]; + int32_t len = offsets[off + r + 1] - start; + tablet->add_value(static_cast(r), tcol, + common::String(data + start, len)); } break; } diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc index 07b363aeb..338aa208c 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.cc +++ b/cpp/src/cwrapper/tsfile_cwrapper.cc @@ -21,21 +21,14 @@ #include #include -#include -#ifdef _WIN32 -#include -#else +#include #include -#endif +#include #include #include -#include -#include "common/device_id.h" -#include "common/statistic.h" #include "common/tablet.h" -#include "common/tsfile_common.h" #include "reader/filter/tag_filter.h" #include "reader/result_set.h" #include "reader/table_result_set.h" @@ -99,8 +92,10 @@ WriteFile write_file_new(const char* pathname, ERRNO* err_code) { int ret; init_tsfile_config(); - if (access(pathname, F_OK) == 0) { - *err_code = common::E_ALREADY_EXIST; + struct stat path_stat {}; + if (stat(pathname, &path_stat) == 0) { + *err_code = S_ISDIR(path_stat.st_mode) ? common::E_FILE_OPEN_ERR + : common::E_ALREADY_EXIST; return nullptr; } @@ -415,10 +410,11 @@ ResultSet tsfile_reader_query_tree_by_row(TsFileReader reader, return result_set; } -ResultSet tsfile_reader_query_table_by_row( - TsFileReader reader, const char* table_name, char** column_names, - int column_names_len, int offset, int limit, TagFilterHandle tag_filter, - int batch_size, ERRNO* err_code) { +ResultSet tsfile_reader_query_table_by_row(TsFileReader reader, + const char* table_name, + char** column_names, + int column_names_len, int offset, + int limit, ERRNO* err_code) { auto* r = static_cast(reader); storage::ResultSet* result_set = nullptr; @@ -431,17 +427,15 @@ ResultSet tsfile_reader_query_table_by_row( columns.emplace_back(name == nullptr ? "" : std::string(name)); } - *err_code = r->queryByRow( - table_name == nullptr ? "" : table_name, columns, offset, limit, - result_set, static_cast(tag_filter), batch_size); + *err_code = r->queryByRow(table_name == nullptr ? "" : table_name, columns, + offset, limit, result_set); return result_set; } ResultSet tsfile_query_table_batch(TsFileReader reader, const char* table_name, char** columns, uint32_t column_num, Timestamp start_time, Timestamp end_time, - TagFilterHandle tag_filter, int batch_size, - ERRNO* err_code) { + int batch_size, ERRNO* err_code) { auto* r = static_cast(reader); storage::ResultSet* table_result_set = nullptr; std::vector column_names; @@ -449,8 +443,7 @@ ResultSet tsfile_query_table_batch(TsFileReader reader, const char* table_name, column_names.emplace_back(columns[i]); } *err_code = r->query(table_name, column_names, start_time, end_time, - table_result_set, - static_cast(tag_filter), batch_size); + table_result_set, batch_size); return table_result_set; } @@ -706,676 +699,6 @@ DeviceSchema* tsfile_reader_get_all_timeseries_schemas(TsFileReader reader, return device_schema; } -void tsfile_device_id_free_contents(DeviceID* d) { - if (d == nullptr) { - return; - } - free(d->path); - d->path = nullptr; - free(d->table_name); - d->table_name = nullptr; - if (d->segments != nullptr) { - for (uint32_t k = 0; k < d->segment_count; k++) { - free(d->segments[k]); - } - free(d->segments); - d->segments = nullptr; - } - d->segment_count = 0; -} - -namespace { - -char* dup_common_string_to_cstr(const common::String& s) { - if (s.buf_ == nullptr || s.len_ == 0) { - return strdup(""); - } - char* p = static_cast(malloc(static_cast(s.len_) + 1U)); - if (p == nullptr) { - return nullptr; - } - memcpy(p, s.buf_, static_cast(s.len_)); - p[s.len_] = '\0'; - return p; -} - -static TSDataType cpp_stat_type_to_c(common::TSDataType t) { - return static_cast(static_cast(t)); -} - -void free_timeseries_statistic_heap(TimeseriesStatistic* s) { - if (s == nullptr) { - return; - } - TsFileStatisticBase* b = tsfile_statistic_base(s); - if (!b->has_statistic) { - return; - } - switch (b->type) { - case TS_DATATYPE_STRING: - free(s->u.string_s.str_min); - s->u.string_s.str_min = nullptr; - free(s->u.string_s.str_max); - s->u.string_s.str_max = nullptr; - free(s->u.string_s.str_first); - s->u.string_s.str_first = nullptr; - free(s->u.string_s.str_last); - s->u.string_s.str_last = nullptr; - break; - case TS_DATATYPE_TEXT: - free(s->u.text_s.str_first); - s->u.text_s.str_first = nullptr; - free(s->u.text_s.str_last); - s->u.text_s.str_last = nullptr; - break; - default: - break; - } -} - -void clear_timeseries_statistic(TimeseriesStatistic* s) { - memset(s, 0, sizeof(*s)); - tsfile_statistic_base(s)->type = TS_DATATYPE_INVALID; -} - -/** - * Fills @p out from C++ Statistic. On allocation failure returns E_OOM and - * clears/frees any partial string fields in @p out. - */ -int fill_timeseries_statistic(storage::Statistic* st, - TimeseriesStatistic* out) { - clear_timeseries_statistic(out); - if (st == nullptr) { - return common::E_OK; - } - const common::TSDataType t = st->get_type(); - switch (t) { - case common::BOOLEAN: { - auto* bs = static_cast(st); - TsFileBoolStatistic* p = &out->u.bool_s; - p->base.has_statistic = true; - p->base.type = cpp_stat_type_to_c(common::BOOLEAN); - p->base.row_count = st->get_count(); - p->base.start_time = st->start_time_; - p->base.end_time = st->get_end_time(); - p->sum = static_cast(bs->sum_value_); - p->first_bool = bs->first_value_; - p->last_bool = bs->last_value_; - break; - } - case common::INT32: { - auto* is = static_cast(st); - TsFileIntStatistic* p = &out->u.int_s; - p->base.has_statistic = true; - p->base.type = cpp_stat_type_to_c(common::INT32); - p->base.row_count = st->get_count(); - p->base.start_time = st->start_time_; - p->base.end_time = st->get_end_time(); - p->sum = static_cast(is->sum_value_); - if (p->base.row_count > 0) { - p->min_int64 = static_cast(is->min_value_); - p->max_int64 = static_cast(is->max_value_); - p->first_int64 = static_cast(is->first_value_); - p->last_int64 = static_cast(is->last_value_); - } - break; - } - case common::DATE: { - auto* is = static_cast(st); - TsFileIntStatistic* p = &out->u.int_s; - p->base.has_statistic = true; - p->base.type = cpp_stat_type_to_c(common::DATE); - p->base.row_count = st->get_count(); - p->base.start_time = st->start_time_; - p->base.end_time = st->get_end_time(); - p->sum = static_cast(is->sum_value_); - if (p->base.row_count > 0) { - p->min_int64 = static_cast(is->min_value_); - p->max_int64 = static_cast(is->max_value_); - p->first_int64 = static_cast(is->first_value_); - p->last_int64 = static_cast(is->last_value_); - } - break; - } - case common::INT64: { - auto* ls = static_cast(st); - TsFileIntStatistic* p = &out->u.int_s; - p->base.has_statistic = true; - p->base.type = cpp_stat_type_to_c(common::INT64); - p->base.row_count = st->get_count(); - p->base.start_time = st->start_time_; - p->base.end_time = st->get_end_time(); - p->sum = ls->sum_value_; - if (p->base.row_count > 0) { - p->min_int64 = ls->min_value_; - p->max_int64 = ls->max_value_; - p->first_int64 = ls->first_value_; - p->last_int64 = ls->last_value_; - } - break; - } - case common::TIMESTAMP: { - auto* ls = static_cast(st); - TsFileIntStatistic* p = &out->u.int_s; - p->base.has_statistic = true; - p->base.type = cpp_stat_type_to_c(common::TIMESTAMP); - p->base.row_count = st->get_count(); - p->base.start_time = st->start_time_; - p->base.end_time = st->get_end_time(); - p->sum = ls->sum_value_; - if (p->base.row_count > 0) { - p->min_int64 = ls->min_value_; - p->max_int64 = ls->max_value_; - p->first_int64 = ls->first_value_; - p->last_int64 = ls->last_value_; - } - break; - } - case common::FLOAT: { - auto* fs = static_cast(st); - TsFileFloatStatistic* p = &out->u.float_s; - p->base.has_statistic = true; - p->base.type = cpp_stat_type_to_c(common::FLOAT); - p->base.row_count = st->get_count(); - p->base.start_time = st->start_time_; - p->base.end_time = st->get_end_time(); - p->sum = static_cast(fs->sum_value_); - if (p->base.row_count > 0) { - p->min_float64 = static_cast(fs->min_value_); - p->max_float64 = static_cast(fs->max_value_); - p->first_float64 = static_cast(fs->first_value_); - p->last_float64 = static_cast(fs->last_value_); - } - break; - } - case common::DOUBLE: { - auto* ds = static_cast(st); - TsFileFloatStatistic* p = &out->u.float_s; - p->base.has_statistic = true; - p->base.type = cpp_stat_type_to_c(common::DOUBLE); - p->base.row_count = st->get_count(); - p->base.start_time = st->start_time_; - p->base.end_time = st->get_end_time(); - p->sum = ds->sum_value_; - if (p->base.row_count > 0) { - p->min_float64 = ds->min_value_; - p->max_float64 = ds->max_value_; - p->first_float64 = ds->first_value_; - p->last_float64 = ds->last_value_; - } - break; - } - case common::STRING: { - auto* ss = static_cast(st); - TsFileStringStatistic* p = &out->u.string_s; - p->base.has_statistic = true; - p->base.type = cpp_stat_type_to_c(common::STRING); - p->base.row_count = st->get_count(); - p->base.start_time = st->start_time_; - p->base.end_time = st->get_end_time(); - p->str_min = dup_common_string_to_cstr(ss->min_value_); - if (p->str_min == nullptr) { - free_timeseries_statistic_heap(out); - clear_timeseries_statistic(out); - return common::E_OOM; - } - p->str_max = dup_common_string_to_cstr(ss->max_value_); - if (p->str_max == nullptr) { - free_timeseries_statistic_heap(out); - clear_timeseries_statistic(out); - return common::E_OOM; - } - p->str_first = dup_common_string_to_cstr(ss->first_value_); - if (p->str_first == nullptr) { - free_timeseries_statistic_heap(out); - clear_timeseries_statistic(out); - return common::E_OOM; - } - p->str_last = dup_common_string_to_cstr(ss->last_value_); - if (p->str_last == nullptr) { - free_timeseries_statistic_heap(out); - clear_timeseries_statistic(out); - return common::E_OOM; - } - break; - } - case common::TEXT: { - auto* ts = static_cast(st); - TsFileTextStatistic* p = &out->u.text_s; - p->base.has_statistic = true; - p->base.type = cpp_stat_type_to_c(common::TEXT); - p->base.row_count = st->get_count(); - p->base.start_time = st->start_time_; - p->base.end_time = st->get_end_time(); - p->str_first = dup_common_string_to_cstr(ts->first_value_); - if (p->str_first == nullptr) { - free_timeseries_statistic_heap(out); - clear_timeseries_statistic(out); - return common::E_OOM; - } - p->str_last = dup_common_string_to_cstr(ts->last_value_); - if (p->str_last == nullptr) { - free_timeseries_statistic_heap(out); - clear_timeseries_statistic(out); - return common::E_OOM; - } - break; - } - default: { - TsFileStatisticBase* b = tsfile_statistic_base(out); - b->has_statistic = true; - b->type = TS_DATATYPE_INVALID; - b->row_count = st->get_count(); - b->start_time = st->start_time_; - b->end_time = st->get_end_time(); - break; - } - } - return common::E_OK; -} - -int fill_timeline_statistic(storage::ITimeseriesIndex* idx, - TimeseriesStatistic* out) { - clear_timeseries_statistic(out); - if (idx == nullptr) { - return common::E_OK; - } - - auto* aligned_idx = dynamic_cast(idx); - if (aligned_idx != nullptr && aligned_idx->time_ts_idx_ != nullptr && - aligned_idx->time_ts_idx_->get_statistic() != nullptr) { - auto* st = aligned_idx->time_ts_idx_->get_statistic(); - TsFileStatisticBase* b = tsfile_statistic_base(out); - b->has_statistic = true; - b->type = TS_DATATYPE_VECTOR; - b->row_count = st->get_count(); - b->start_time = st->start_time_; - b->end_time = st->get_end_time(); - return common::E_OK; - } - - if (idx->get_statistic() != nullptr && - idx->get_time_chunk_meta_list() == nullptr) { - auto* st = idx->get_statistic(); - TsFileStatisticBase* b = tsfile_statistic_base(out); - b->has_statistic = true; - b->type = TS_DATATYPE_VECTOR; - b->row_count = st->get_count(); - b->start_time = st->start_time_; - b->end_time = st->get_end_time(); - return common::E_OK; - } - - auto* list = idx->get_time_chunk_meta_list(); - if (list == nullptr) { - list = idx->get_chunk_meta_list(); - } - if (list == nullptr) { - return common::E_OK; - } - - int64_t row_count = 0; - int64_t start_time = 0; - int64_t end_time = 0; - bool has_statistic = false; - for (auto it = list->begin(); it != list->end(); it++) { - auto* chunk_meta = it.get(); - if (chunk_meta == nullptr || chunk_meta->statistic_ == nullptr || - chunk_meta->statistic_->count_ <= 0) { - continue; - } - if (!has_statistic) { - start_time = chunk_meta->statistic_->start_time_; - end_time = chunk_meta->statistic_->end_time_; - has_statistic = true; - } else { - start_time = - std::min(start_time, chunk_meta->statistic_->start_time_); - end_time = std::max(end_time, chunk_meta->statistic_->end_time_); - } - row_count += chunk_meta->statistic_->count_; - } - - if (!has_statistic) { - return common::E_OK; - } - - TsFileStatisticBase* b = tsfile_statistic_base(out); - b->has_statistic = true; - b->type = TS_DATATYPE_VECTOR; - b->row_count = row_count; - b->start_time = start_time; - b->end_time = end_time; - return common::E_OK; -} - -void free_device_timeseries_metadata_entries_partial( - DeviceTimeseriesMetadataEntry* entries, size_t filled_count) { - if (entries == nullptr) { - return; - } - for (size_t i = 0; i < filled_count; i++) { - tsfile_device_id_free_contents(&entries[i].device); - if (entries[i].timeseries != nullptr) { - for (uint32_t j = 0; j < entries[i].timeseries_count; j++) { - free_timeseries_statistic_heap( - &entries[i].timeseries[j].statistic); - free_timeseries_statistic_heap( - &entries[i].timeseries[j].timeline_statistic); - free(entries[i].timeseries[j].measurement_name); - } - free(entries[i].timeseries); - entries[i].timeseries = nullptr; - } - } - free(entries); -} - -/** - * Copies path, table name, and segment strings from IDeviceID into heap - * buffers. On failure, frees any partial allocations and returns E_OOM. - */ -int duplicate_ideviceid_to_device_fields(storage::IDeviceID* id, - char** out_path, char** out_table_name, - uint32_t* out_segment_count, - char*** out_segments) { - *out_path = nullptr; - *out_table_name = nullptr; - *out_segment_count = 0; - *out_segments = nullptr; - if (id == nullptr) { - *out_path = strdup(""); - *out_table_name = strdup(""); - if (*out_path == nullptr || *out_table_name == nullptr) { - free(*out_path); - free(*out_table_name); - *out_path = nullptr; - *out_table_name = nullptr; - return common::E_OOM; - } - return common::E_OK; - } - const std::string dname = id->get_device_name(); - *out_path = strdup(dname.c_str()); - if (*out_path == nullptr) { - return common::E_OOM; - } - const std::string tname = id->get_table_name(); - *out_table_name = strdup(tname.c_str()); - if (*out_table_name == nullptr) { - free(*out_path); - *out_path = nullptr; - return common::E_OOM; - } - const int n = id->segment_num(); - if (n <= 0) { - return common::E_OK; - } - auto* seg_arr = - static_cast(malloc(sizeof(char*) * static_cast(n))); - if (seg_arr == nullptr) { - free(*out_table_name); - *out_table_name = nullptr; - free(*out_path); - *out_path = nullptr; - return common::E_OOM; - } - memset(seg_arr, 0, sizeof(char*) * static_cast(n)); - const auto& segs = id->get_segments(); - for (int i = 0; i < n; i++) { - const std::string* ps = - (static_cast(i) < segs.size()) ? segs[i] : nullptr; - const char* lit = (ps != nullptr) ? ps->c_str() : "null"; - seg_arr[i] = strdup(lit); - if (seg_arr[i] == nullptr) { - for (int j = 0; j < i; j++) { - free(seg_arr[j]); - } - free(seg_arr); - free(*out_table_name); - *out_table_name = nullptr; - free(*out_path); - *out_path = nullptr; - return common::E_OOM; - } - } - *out_segment_count = static_cast(n); - *out_segments = seg_arr; - return common::E_OK; -} - -int fill_device_id_from_ideviceid(storage::IDeviceID* id, DeviceID* out) { - memset(out, 0, sizeof(*out)); - return duplicate_ideviceid_to_device_fields( - id, &out->path, &out->table_name, &out->segment_count, &out->segments); -} - -void clear_metadata_entry_device_only(DeviceTimeseriesMetadataEntry* e) { - if (e == nullptr) { - return; - } - tsfile_device_id_free_contents(&e->device); -} - -ERRNO populate_c_metadata_map_from_cpp( - storage::DeviceTimeseriesMetadataMap& cpp_map, - DeviceTimeseriesMetadataMap* out_map) { - if (cpp_map.empty()) { - return common::E_OK; - } - const uint32_t dev_n = static_cast(cpp_map.size()); - auto* entries = static_cast( - malloc(sizeof(DeviceTimeseriesMetadataEntry) * dev_n)); - if (entries == nullptr) { - return common::E_OOM; - } - memset(entries, 0, sizeof(DeviceTimeseriesMetadataEntry) * dev_n); - size_t di = 0; - for (const auto& kv : cpp_map) { - DeviceTimeseriesMetadataEntry& e = entries[di]; - const int dup_rc = fill_device_id_from_ideviceid( - kv.first ? kv.first.get() : nullptr, &e.device); - if (dup_rc != common::E_OK) { - free_device_timeseries_metadata_entries_partial(entries, di); - return dup_rc; - } - const auto& vec = kv.second; - uint32_t n_ts = 0; - for (const auto& idx_nz : vec) { - if (idx_nz != nullptr) { - n_ts++; - } - } - e.timeseries_count = n_ts; - if (e.timeseries_count == 0) { - e.timeseries = nullptr; - di++; - continue; - } - e.timeseries = static_cast( - malloc(sizeof(TimeseriesMetadata) * e.timeseries_count)); - if (e.timeseries == nullptr) { - clear_metadata_entry_device_only(&e); - free_device_timeseries_metadata_entries_partial(entries, di); - return common::E_OOM; - } - memset(e.timeseries, 0, - sizeof(TimeseriesMetadata) * e.timeseries_count); - uint32_t slot = 0; - for (const auto& idx : vec) { - if (idx == nullptr) { - continue; - } - TimeseriesMetadata& m = e.timeseries[slot]; - common::String mn = idx->get_measurement_name(); - m.measurement_name = strdup(mn.to_std_string().c_str()); - if (m.measurement_name == nullptr) { - for (uint32_t u = 0; u < slot; u++) { - free_timeseries_statistic_heap(&e.timeseries[u].statistic); - free(e.timeseries[u].measurement_name); - } - free(e.timeseries); - e.timeseries = nullptr; - clear_metadata_entry_device_only(&e); - free_device_timeseries_metadata_entries_partial(entries, di); - return common::E_OOM; - } - auto* aligned_idx = - dynamic_cast(idx.get()); - if (aligned_idx != nullptr && - aligned_idx->value_ts_idx_ != nullptr) { - m.data_type = static_cast( - aligned_idx->value_ts_idx_->get_data_type()); - } else { - m.data_type = static_cast(idx->get_data_type()); - } - storage::Statistic* st = idx->get_statistic(); - int32_t chunk_cnt = 0; - auto* cl = aligned_idx != nullptr ? idx->get_value_chunk_meta_list() - : idx->get_chunk_meta_list(); - if (cl != nullptr) { - chunk_cnt = static_cast(cl->size()); - } - m.chunk_meta_count = chunk_cnt; - const int st_rc = fill_timeseries_statistic(st, &m.statistic); - if (st_rc != common::E_OK) { - for (uint32_t u = 0; u < slot; u++) { - free_timeseries_statistic_heap(&e.timeseries[u].statistic); - free_timeseries_statistic_heap( - &e.timeseries[u].timeline_statistic); - free(e.timeseries[u].measurement_name); - } - free_timeseries_statistic_heap(&m.statistic); - free_timeseries_statistic_heap(&m.timeline_statistic); - free(m.measurement_name); - free(e.timeseries); - e.timeseries = nullptr; - clear_metadata_entry_device_only(&e); - free_device_timeseries_metadata_entries_partial(entries, di); - return st_rc; - } - const int timeline_st_rc = - fill_timeline_statistic(idx.get(), &m.timeline_statistic); - if (timeline_st_rc != common::E_OK) { - for (uint32_t u = 0; u < slot; u++) { - free_timeseries_statistic_heap(&e.timeseries[u].statistic); - free_timeseries_statistic_heap( - &e.timeseries[u].timeline_statistic); - free(e.timeseries[u].measurement_name); - } - free_timeseries_statistic_heap(&m.statistic); - free_timeseries_statistic_heap(&m.timeline_statistic); - free(m.measurement_name); - free(e.timeseries); - e.timeseries = nullptr; - clear_metadata_entry_device_only(&e); - free_device_timeseries_metadata_entries_partial(entries, di); - return timeline_st_rc; - } - slot++; - } - di++; - } - out_map->entries = entries; - out_map->device_count = dev_n; - return common::E_OK; -} - -} // namespace - -void tsfile_free_device_id_array(DeviceID* devices, uint32_t length) { - if (devices == nullptr) { - return; - } - for (uint32_t i = 0; i < length; i++) { - tsfile_device_id_free_contents(&devices[i]); - } - free(devices); -} - -ERRNO tsfile_reader_get_all_devices(TsFileReader reader, DeviceID** out_devices, - uint32_t* out_length) { - if (reader == nullptr || out_devices == nullptr || out_length == nullptr) { - return common::E_INVALID_ARG; - } - *out_devices = nullptr; - *out_length = 0; - auto* r = static_cast(reader); - const auto ids = r->get_all_devices(); - if (ids.empty()) { - return common::E_OK; - } - auto* arr = static_cast(malloc(sizeof(DeviceID) * ids.size())); - if (arr == nullptr) { - return common::E_OOM; - } - memset(arr, 0, sizeof(DeviceID) * ids.size()); - for (size_t i = 0; i < ids.size(); i++) { - const int rc = fill_device_id_from_ideviceid(ids[i].get(), &arr[i]); - if (rc != common::E_OK) { - tsfile_free_device_id_array(arr, static_cast(i)); - return rc; - } - } - *out_devices = arr; - *out_length = static_cast(ids.size()); - return common::E_OK; -} - -ERRNO tsfile_reader_get_timeseries_metadata_all( - TsFileReader reader, DeviceTimeseriesMetadataMap* out_map) { - if (reader == nullptr || out_map == nullptr) { - return common::E_INVALID_ARG; - } - out_map->entries = nullptr; - out_map->device_count = 0; - auto* r = static_cast(reader); - storage::DeviceTimeseriesMetadataMap cpp_map = r->get_timeseries_metadata(); - return populate_c_metadata_map_from_cpp(cpp_map, out_map); -} - -ERRNO tsfile_reader_get_timeseries_metadata_for_devices( - TsFileReader reader, const DeviceID* devices, uint32_t length, - DeviceTimeseriesMetadataMap* out_map) { - if (reader == nullptr || out_map == nullptr) { - return common::E_INVALID_ARG; - } - out_map->entries = nullptr; - out_map->device_count = 0; - if (length == 0) { - return common::E_OK; - } - if (devices == nullptr) { - return common::E_INVALID_ARG; - } - for (uint32_t i = 0; i < length; i++) { - if (devices[i].path == nullptr) { - return common::E_INVALID_ARG; - } - } - auto* r = static_cast(reader); - std::vector> query_ids; - query_ids.reserve(length); - for (uint32_t i = 0; i < length; i++) { - query_ids.push_back(std::make_shared( - std::string(devices[i].path))); - } - storage::DeviceTimeseriesMetadataMap cpp_map = - r->get_timeseries_metadata(query_ids); - return populate_c_metadata_map_from_cpp(cpp_map, out_map); -} - -void tsfile_free_device_timeseries_metadata_map( - DeviceTimeseriesMetadataMap* map) { - if (map == nullptr) { - return; - } - free_device_timeseries_metadata_entries_partial(map->entries, - map->device_count); - map->entries = nullptr; - map->device_count = 0; -} - // delete pointer void _free_tsfile_ts_record(TsRecord* record) { if (*record != nullptr) { @@ -1594,100 +917,62 @@ ResultSet _tsfile_reader_query_device(TsFileReader reader, return qds; } -// ---------- Tag Filter API ---------- +// ============== Tag Filter API Implementation ============== -TagFilterHandle tsfile_tag_filter_create(TsFileReader reader, - const char* table_name, - const char* column_name, - const char* value, TagFilterOp op, - ERRNO* err_code) { - auto* r = static_cast(reader); - auto schema = r->get_table_schema(table_name); - if (!schema) { - *err_code = common::E_INVALID_ARG; - return nullptr; +// Helper macro to avoid repetition in tag filter factory functions. +// The shared_ptr must stay alive while TagFilterBuilder accesses the schema. +#define DEFINE_TAG_FILTER_FACTORY(name, method) \ + TagFilterHandle tsfile_tag_filter_##name( \ + TsFileReader reader, const char* table_name, const char* column_name, \ + const char* value) { \ + auto* r = static_cast(reader); \ + auto schema = r->get_table_schema(table_name); \ + if (!schema) return nullptr; \ + storage::TagFilterBuilder builder(schema.get()); \ + return builder.method(column_name, value); \ } - storage::TagFilterBuilder builder(schema.get()); - storage::Filter* filter = nullptr; - switch (op) { - case TAG_FILTER_EQ: - filter = builder.eq(column_name, value); - break; - case TAG_FILTER_NEQ: - filter = builder.neq(column_name, value); - break; - case TAG_FILTER_LT: - filter = builder.lt(column_name, value); - break; - case TAG_FILTER_LTEQ: - filter = builder.lteq(column_name, value); - break; - case TAG_FILTER_GT: - filter = builder.gt(column_name, value); - break; - case TAG_FILTER_GTEQ: - filter = builder.gteq(column_name, value); - break; - case TAG_FILTER_REGEXP: - filter = builder.reg_exp(column_name, value); - break; - case TAG_FILTER_NOT_REGEXP: - filter = builder.not_reg_exp(column_name, value); - break; - default: - *err_code = common::E_INVALID_ARG; - return nullptr; - } - *err_code = common::E_OK; - return static_cast(filter); -} -TagFilterHandle tsfile_tag_filter_between(TsFileReader reader, - const char* table_name, - const char* column_name, - const char* lower, const char* upper, - bool is_not, ERRNO* err_code) { - auto* r = static_cast(reader); - auto schema = r->get_table_schema(table_name); - if (!schema) { - *err_code = common::E_INVALID_ARG; - return nullptr; - } - storage::TagFilterBuilder builder(schema.get()); - storage::Filter* filter = - is_not ? builder.not_between_and(column_name, lower, upper) - : builder.between_and(column_name, lower, upper); - *err_code = common::E_OK; - return static_cast(filter); -} +DEFINE_TAG_FILTER_FACTORY(eq, eq) +DEFINE_TAG_FILTER_FACTORY(neq, neq) +DEFINE_TAG_FILTER_FACTORY(lt, lt) +DEFINE_TAG_FILTER_FACTORY(lteq, lteq) +DEFINE_TAG_FILTER_FACTORY(gt, gt) +DEFINE_TAG_FILTER_FACTORY(gteq, gteq) + +#undef DEFINE_TAG_FILTER_FACTORY TagFilterHandle tsfile_tag_filter_and(TagFilterHandle left, TagFilterHandle right) { - return static_cast(storage::TagFilterBuilder::and_filter( + if (!left || !right) return nullptr; + return storage::TagFilterBuilder::and_filter( static_cast(left), - static_cast(right))); + static_cast(right)); } TagFilterHandle tsfile_tag_filter_or(TagFilterHandle left, TagFilterHandle right) { - return static_cast(storage::TagFilterBuilder::or_filter( + if (!left || !right) return nullptr; + return storage::TagFilterBuilder::or_filter( static_cast(left), - static_cast(right))); + static_cast(right)); } TagFilterHandle tsfile_tag_filter_not(TagFilterHandle filter) { - return static_cast(storage::TagFilterBuilder::not_filter( - static_cast(filter))); + if (!filter) return nullptr; + return storage::TagFilterBuilder::not_filter( + static_cast(filter)); } void tsfile_tag_filter_free(TagFilterHandle filter) { - delete static_cast(filter); + if (filter) { + delete static_cast(filter); + } } -ResultSet tsfile_query_table_with_tag_filter( +ResultSet tsfile_query_table_batch_with_filter( TsFileReader reader, const char* table_name, char** columns, uint32_t column_num, Timestamp start_time, Timestamp end_time, - TagFilterHandle tag_filter, int batch_size, ERRNO* err_code) { + int batch_size, TagFilterHandle tag_filter, ERRNO* err_code) { auto* r = static_cast(reader); storage::ResultSet* table_result_set = nullptr; std::vector column_names; diff --git a/cpp/src/cwrapper/tsfile_cwrapper.h b/cpp/src/cwrapper/tsfile_cwrapper.h index ae3e28eed..98ca32910 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.h +++ b/cpp/src/cwrapper/tsfile_cwrapper.h @@ -104,135 +104,6 @@ typedef struct device_schema { int timeseries_num; } DeviceSchema; -/** - * @brief Common header for all statistic variants (first member of each - * TsFile*Statistic struct; also aliases the start of TimeseriesStatistic::u). - * - * When @p has_statistic is false, @p type is undefined. Otherwise @p type - * selects which @ref TimeseriesStatisticUnion member is active (INT32/DATE/ - * INT64/TIMESTAMP share @c int_s). @c sum exists only on @c bool_s, @c int_s, - * and @c float_s. Heap strings in string_s/text_s are - * freed by tsfile_free_device_timeseries_metadata_map only. - */ -typedef struct TsFileStatisticBase { - bool has_statistic; - TSDataType type; - int32_t row_count; - int64_t start_time; - int64_t end_time; -} TsFileStatisticBase; - -typedef struct TsFileBoolStatistic { - TsFileStatisticBase base; - double sum; - bool first_bool; - bool last_bool; -} TsFileBoolStatistic; - -typedef struct TsFileIntStatistic { - TsFileStatisticBase base; - double sum; - int64_t min_int64; - int64_t max_int64; - int64_t first_int64; - int64_t last_int64; -} TsFileIntStatistic; - -typedef struct TsFileFloatStatistic { - TsFileStatisticBase base; - double sum; - double min_float64; - double max_float64; - double first_float64; - double last_float64; -} TsFileFloatStatistic; - -typedef struct TsFileStringStatistic { - TsFileStatisticBase base; - char* str_min; - char* str_max; - char* str_first; - char* str_last; -} TsFileStringStatistic; - -typedef struct TsFileTextStatistic { - TsFileStatisticBase base; - char* str_first; - char* str_last; -} TsFileTextStatistic; - -/** - * @brief One of the typed layouts; active member follows @c base.type. - */ -typedef union TimeseriesStatisticUnion { - TsFileBoolStatistic bool_s; - TsFileIntStatistic int_s; - TsFileFloatStatistic float_s; - TsFileStringStatistic string_s; - TsFileTextStatistic text_s; -} TimeseriesStatisticUnion; - -/** - * @brief Aggregated statistic for one timeseries (subset of C++ Statistic). - * - * Read common fields via @c tsfile_statistic_base(s). Type-specific fields - * via @c s->u.int_s, @c s->u.float_s, etc., per @c base.type. - */ -typedef struct TimeseriesStatistic { - TimeseriesStatisticUnion u; -} TimeseriesStatistic; - -/** Pointer to the common header at the start of @p s->u (any active arm). */ -#define tsfile_statistic_base(s) ((TsFileStatisticBase*)&(s)->u) - -/** - * @brief One measurement's metadata as exposed to C. - */ -typedef struct TimeseriesMetadata { - char* measurement_name; - TSDataType data_type; - int32_t chunk_meta_count; - TimeseriesStatistic statistic; - TimeseriesStatistic timeline_statistic; -} TimeseriesMetadata; - -/** - * @brief Device identity from IDeviceID (path, table name, segments). - * - * Heap fields are freed by tsfile_device_id_free_contents or - * tsfile_free_device_id_array, or as part of - * tsfile_free_device_timeseries_metadata_map for entries. - */ -typedef struct DeviceID { - char* path; - char* table_name; - uint32_t segment_count; - char** segments; -} DeviceID; - -/** - * @brief One device's timeseries metadata list plus DeviceID. - * - * @p device heap fields freed by tsfile_free_device_timeseries_metadata_map. - */ -typedef struct DeviceTimeseriesMetadataEntry { - DeviceID device; - TimeseriesMetadata* timeseries; - uint32_t timeseries_count; -} DeviceTimeseriesMetadataEntry; - -/** - * @brief Map device -> list of TimeseriesMetadata (C layout with explicit - * counts). - */ -typedef struct DeviceTimeseriesMetadataMap { - DeviceTimeseriesMetadataEntry* entries; - uint32_t device_count; -} DeviceTimeseriesMetadataMap; - -/** Frees path, table_name, and segments inside @p d; zeros @p d. */ -void tsfile_device_id_free_contents(DeviceID* d); - typedef struct result_set_meta_data { char** column_names; TSDataType* data_types; @@ -253,7 +124,6 @@ typedef void* Tablet; typedef void* TsRecord; typedef void* ResultSet; -typedef void* TagFilterHandle; typedef struct arrow_schema { // Array type description @@ -446,37 +316,6 @@ ERRNO tsfile_writer_close(TsFileWriter writer); */ ERRNO tsfile_reader_close(TsFileReader reader); -/** - * @brief Lists all devices (path, table name, segments from IDeviceID). - * - * @param out_devices [out] Allocated array; caller frees with - * tsfile_free_device_id_array. - */ -ERRNO tsfile_reader_get_all_devices(TsFileReader reader, DeviceID** out_devices, - uint32_t* out_length); - -void tsfile_free_device_id_array(DeviceID* devices, uint32_t length); - -/** - * @brief Timeseries metadata for all devices in the file. - */ -ERRNO tsfile_reader_get_timeseries_metadata_all( - TsFileReader reader, DeviceTimeseriesMetadataMap* out_map); - -/** - * @brief Timeseries metadata for a subset of devices. - * - * @param devices NULL and length>0 is E_INVALID_ARG. length==0: empty result - * (E_OK); @p devices is not read. - * For each entry, @p path must be non-NULL (canonical device path). - */ -ERRNO tsfile_reader_get_timeseries_metadata_for_devices( - TsFileReader reader, const DeviceID* devices, uint32_t length, - DeviceTimeseriesMetadataMap* out_map); - -void tsfile_free_device_timeseries_metadata_map( - DeviceTimeseriesMetadataMap* map); - /*--------------------------Tablet API------------------------ */ /** @@ -677,16 +516,16 @@ ResultSet tsfile_reader_query_tree_by_row(TsFileReader reader, * @param err_code [out] Error code. E_OK(0) on success. * @return ResultSet handle on success; NULL on failure. */ -ResultSet tsfile_reader_query_table_by_row( - TsFileReader reader, const char* table_name, char** column_names, - int column_names_len, int offset, int limit, TagFilterHandle tag_filter, - int batch_size, ERRNO* err_code); +ResultSet tsfile_reader_query_table_by_row(TsFileReader reader, + const char* table_name, + char** column_names, + int column_names_len, int offset, + int limit, ERRNO* err_code); ResultSet tsfile_query_table_batch(TsFileReader reader, const char* table_name, char** columns, uint32_t column_num, Timestamp start_time, Timestamp end_time, - TagFilterHandle tag_filter, int batch_size, - ERRNO* err_code); + int batch_size, ERRNO* err_code); // ResultSet tsfile_reader_query_device(TsFileReader reader, // const char* device_name, // char** sensor_name, uint32_t @@ -861,82 +700,6 @@ TableSchema* tsfile_reader_get_all_table_schemas(TsFileReader reader, DeviceSchema* tsfile_reader_get_all_timeseries_schemas(TsFileReader reader, uint32_t* size); -// ---------- Tag Filter API ---------- - -/** - * @brief Tag filter comparison operators. - */ -typedef enum { - TAG_FILTER_EQ = 0, - TAG_FILTER_NEQ = 1, - TAG_FILTER_LT = 2, - TAG_FILTER_LTEQ = 3, - TAG_FILTER_GT = 4, - TAG_FILTER_GTEQ = 5, - TAG_FILTER_REGEXP = 6, - TAG_FILTER_NOT_REGEXP = 7, -} TagFilterOp; - -/** - * @brief Create a tag filter with a comparison operator. - * - * @param reader [in] TsFileReader handle (used to resolve column name to - * index). - * @param table_name [in] Table name whose schema defines the TAG columns. - * @param column_name [in] Name of the TAG column to filter on. - * @param value [in] Comparison value (string). - * @param op [in] Comparison operator (TagFilterOp). - * @param err_code [out] Error code. E_OK(0) on success. - * @return TagFilterHandle on success; NULL on failure. - */ -TagFilterHandle tsfile_tag_filter_create(TsFileReader reader, - const char* table_name, - const char* column_name, - const char* value, TagFilterOp op, - ERRNO* err_code); - -/** - * @brief Create a BETWEEN tag filter (lower <= column <= upper). - */ -TagFilterHandle tsfile_tag_filter_between(TsFileReader reader, - const char* table_name, - const char* column_name, - const char* lower, const char* upper, - bool is_not, ERRNO* err_code); - -/** - * @brief Combine two tag filters with AND. - */ -TagFilterHandle tsfile_tag_filter_and(TagFilterHandle left, - TagFilterHandle right); - -/** - * @brief Combine two tag filters with OR. - */ -TagFilterHandle tsfile_tag_filter_or(TagFilterHandle left, - TagFilterHandle right); - -/** - * @brief Negate a tag filter. - */ -TagFilterHandle tsfile_tag_filter_not(TagFilterHandle filter); - -/** - * @brief Free a tag filter and all its children. - */ -void tsfile_tag_filter_free(TagFilterHandle filter); - -/** - * @brief Query table with tag filter. - * - * @param batch_size <= 0 means row-by-row return mode, - * > 0 means return TsBlock with the specified block size. - */ -ResultSet tsfile_query_table_with_tag_filter( - TsFileReader reader, const char* table_name, char** columns, - uint32_t column_num, Timestamp start_time, Timestamp end_time, - TagFilterHandle tag_filter, int batch_size, ERRNO* err_code); - // Close and free resource. void free_tablet(Tablet* tablet); void free_tsfile_result_set(ResultSet* result_set); @@ -1026,6 +789,79 @@ ResultSet _tsfile_reader_query_device(TsFileReader reader, // Free row record. void _free_tsfile_ts_record(TsRecord* record); +// ============== Tag Filter API ============== + +typedef void* TagFilterHandle; + +/** + * @brief Create a tag equality filter: column == value. + * + * @param reader [in] Valid TsFileReader handle (used to resolve column index). + * @param table_name [in] Target table name. + * @param column_name [in] Tag column name. + * @param value [in] Value to compare against. + * @return TagFilterHandle on success, NULL on failure. + */ +TagFilterHandle tsfile_tag_filter_eq(TsFileReader reader, + const char* table_name, + const char* column_name, + const char* value); + +TagFilterHandle tsfile_tag_filter_neq(TsFileReader reader, + const char* table_name, + const char* column_name, + const char* value); + +TagFilterHandle tsfile_tag_filter_lt(TsFileReader reader, + const char* table_name, + const char* column_name, + const char* value); + +TagFilterHandle tsfile_tag_filter_lteq(TsFileReader reader, + const char* table_name, + const char* column_name, + const char* value); + +TagFilterHandle tsfile_tag_filter_gt(TsFileReader reader, + const char* table_name, + const char* column_name, + const char* value); + +TagFilterHandle tsfile_tag_filter_gteq(TsFileReader reader, + const char* table_name, + const char* column_name, + const char* value); + +/** + * @brief Logical AND of two tag filters. Takes ownership of left and right. + */ +TagFilterHandle tsfile_tag_filter_and(TagFilterHandle left, + TagFilterHandle right); + +/** + * @brief Logical OR of two tag filters. Takes ownership of left and right. + */ +TagFilterHandle tsfile_tag_filter_or(TagFilterHandle left, + TagFilterHandle right); + +/** + * @brief Logical NOT of a tag filter. Takes ownership of filter. + */ +TagFilterHandle tsfile_tag_filter_not(TagFilterHandle filter); + +/** + * @brief Free a tag filter handle. + */ +void tsfile_tag_filter_free(TagFilterHandle filter); + +/** + * @brief Batch query with tag filter support. + */ +ResultSet tsfile_query_table_batch_with_filter( + TsFileReader reader, const char* table_name, char** columns, + uint32_t column_num, Timestamp start_time, Timestamp end_time, + int batch_size, TagFilterHandle tag_filter, ERRNO* err_code); + #ifdef __cplusplus } #endif diff --git a/cpp/src/encoding/decoder.h b/cpp/src/encoding/decoder.h index c290b5791..24455ca01 100644 --- a/cpp/src/encoding/decoder.h +++ b/cpp/src/encoding/decoder.h @@ -21,6 +21,7 @@ #define ENCODING_DECODER_H #include "common/allocator/byte_stream.h" +#include "common/db_common.h" namespace storage { @@ -37,6 +38,140 @@ class Decoder { virtual int read_double(double& ret_value, common::ByteStream& in) = 0; virtual int read_String(common::String& ret_value, common::PageArena& pa, common::ByteStream& in) = 0; + + virtual int read_batch_int32(int32_t* out, int capacity, int& actual, + common::ByteStream& in) { + actual = 0; + int ret = common::E_OK; + int32_t val; + while (actual < capacity && has_remaining(in)) { + ret = read_int32(val, in); + if (ret != common::E_OK) { + return ret; + } + out[actual++] = val; + } + return common::E_OK; + } + + virtual int read_batch_int64(int64_t* out, int capacity, int& actual, + common::ByteStream& in) { + actual = 0; + int ret = common::E_OK; + int64_t val; + while (actual < capacity && has_remaining(in)) { + ret = read_int64(val, in); + if (ret != common::E_OK) { + return ret; + } + out[actual++] = val; + } + return common::E_OK; + } + + virtual int read_batch_float(float* out, int capacity, int& actual, + common::ByteStream& in) { + actual = 0; + int ret = common::E_OK; + float val; + while (actual < capacity && has_remaining(in)) { + ret = read_float(val, in); + if (ret != common::E_OK) { + return ret; + } + out[actual++] = val; + } + return common::E_OK; + } + + virtual int read_batch_double(double* out, int capacity, int& actual, + common::ByteStream& in) { + actual = 0; + int ret = common::E_OK; + double val; + while (actual < capacity && has_remaining(in)) { + ret = read_double(val, in); + if (ret != common::E_OK) { + return ret; + } + out[actual++] = val; + } + return common::E_OK; + } + + virtual int skip_int32(int count, int& skipped, common::ByteStream& in) { + skipped = 0; + int ret = common::E_OK; + int32_t dummy; + while (skipped < count && has_remaining(in)) { + ret = read_int32(dummy, in); + if (ret != common::E_OK) { + return ret; + } + ++skipped; + } + return common::E_OK; + } + + virtual int skip_int64(int count, int& skipped, common::ByteStream& in) { + skipped = 0; + int ret = common::E_OK; + int64_t dummy; + while (skipped < count && has_remaining(in)) { + ret = read_int64(dummy, in); + if (ret != common::E_OK) { + return ret; + } + ++skipped; + } + return common::E_OK; + } + + virtual int skip_float(int count, int& skipped, common::ByteStream& in) { + skipped = 0; + int ret = common::E_OK; + float dummy; + while (skipped < count && has_remaining(in)) { + ret = read_float(dummy, in); + if (ret != common::E_OK) { + return ret; + } + ++skipped; + } + return common::E_OK; + } + + virtual int skip_double(int count, int& skipped, common::ByteStream& in) { + skipped = 0; + int ret = common::E_OK; + double dummy; + while (skipped < count && has_remaining(in)) { + ret = read_double(dummy, in); + if (ret != common::E_OK) { + return ret; + } + ++skipped; + } + return common::E_OK; + } + + // Block-level filter check: peek the next block header and compute + // the value range [block_min, block_max] without decoding. + // Returns true if a block was peeked; false if not supported or no data. + // After peeking, caller must either: + // - Call skip_peeked_block_int64() to skip the block + // - Call read_batch_int64() which will use the peeked header + virtual bool peek_next_block_range_int64(common::ByteStream& in, + int64_t& block_min, + int64_t& block_max, + int& block_count) { + return false; + } + + // Skip the block whose header was already consumed by peek. + virtual int skip_peeked_block_int64(common::ByteStream& in, int& skipped) { + return common::E_NOT_SUPPORT; + } }; } // end namespace storage diff --git a/cpp/src/encoding/dictionary_decoder.h b/cpp/src/encoding/dictionary_decoder.h index 2962c66ba..5f64b5873 100644 --- a/cpp/src/encoding/dictionary_decoder.h +++ b/cpp/src/encoding/dictionary_decoder.h @@ -73,8 +73,7 @@ class DictionaryDecoder : public Decoder { if (entry_index_.empty()) { init_map(buffer); } - int32_t code = 0; - value_decoder_.read_int(code, buffer); + int code = value_decoder_.read_int(buffer); return entry_index_[code]; } diff --git a/cpp/src/encoding/dictionary_encoder.h b/cpp/src/encoding/dictionary_encoder.h index be5f78a09..fad4ef68f 100644 --- a/cpp/src/encoding/dictionary_encoder.h +++ b/cpp/src/encoding/dictionary_encoder.h @@ -83,7 +83,7 @@ class DictionaryEncoder : public Encoder { if (entry_index_.count(value) == 0) { index_entry_.push_back(value); map_size_ = map_size_ + value.length(); - entry_index_[value] = static_cast(index_entry_.size()) - 1; + entry_index_[value] = entry_index_.size(); } values_encoder_.encode(entry_index_[value], out); return common::E_OK; diff --git a/cpp/src/encoding/encoder.h b/cpp/src/encoding/encoder.h index 921686446..386129f6e 100644 --- a/cpp/src/encoding/encoder.h +++ b/cpp/src/encoding/encoder.h @@ -48,6 +48,81 @@ class Encoder { * @return the maximal size of possible memory occupied by current encoder */ virtual int get_max_byte_size() = 0; + + /* + * Batch encoding interfaces. + * Default implementations fall back to per-value encode(). + * Subclasses may override for better performance. + */ + virtual int encode_batch(const bool* values, uint32_t count, + common::ByteStream& out_stream) { + int ret = common::E_OK; + for (uint32_t i = 0; i < count; i++) { + if (RET_FAIL(encode(values[i], out_stream))) { + return ret; + } + } + return ret; + } + virtual int encode_batch(const int32_t* values, uint32_t count, + common::ByteStream& out_stream) { + int ret = common::E_OK; + for (uint32_t i = 0; i < count; i++) { + if (RET_FAIL(encode(values[i], out_stream))) { + return ret; + } + } + return ret; + } + virtual int encode_batch(const int64_t* values, uint32_t count, + common::ByteStream& out_stream) { + int ret = common::E_OK; + for (uint32_t i = 0; i < count; i++) { + if (RET_FAIL(encode(values[i], out_stream))) { + return ret; + } + } + return ret; + } + virtual int encode_batch(const float* values, uint32_t count, + common::ByteStream& out_stream) { + int ret = common::E_OK; + for (uint32_t i = 0; i < count; i++) { + if (RET_FAIL(encode(values[i], out_stream))) { + return ret; + } + } + return ret; + } + virtual int encode_batch(const double* values, uint32_t count, + common::ByteStream& out_stream) { + int ret = common::E_OK; + for (uint32_t i = 0; i < count; i++) { + if (RET_FAIL(encode(values[i], out_stream))) { + return ret; + } + } + return ret; + } + + // Batch encode strings from a contiguous buffer with offset array + // (Arrow-style layout from Tablet::StringColumn). + // string[i] = buffer + offsets[start_idx + i], length = offsets[start_idx + + // i + 1] - offsets[start_idx + i]. + virtual int encode_string_batch(const char* buffer, const uint32_t* offsets, + uint32_t start_idx, uint32_t count, + common::ByteStream& out_stream) { + int ret = common::E_OK; + for (uint32_t i = 0; i < count; i++) { + uint32_t idx = start_idx + i; + uint32_t len = offsets[idx + 1] - offsets[idx]; + common::String val(buffer + offsets[idx], len); + if (RET_FAIL(encode(val, out_stream))) { + return ret; + } + } + return ret; + } }; } // end namespace storage diff --git a/cpp/src/encoding/gorilla_decoder.h b/cpp/src/encoding/gorilla_decoder.h index 5684561aa..aaafc0bd0 100644 --- a/cpp/src/encoding/gorilla_decoder.h +++ b/cpp/src/encoding/gorilla_decoder.h @@ -30,6 +30,142 @@ namespace storage { +// ── Raw-pointer bit reader ──────────────────────────────────────────────── +// Operates directly on a contiguous byte array, bypassing ByteStream's +// per-byte read_buf() overhead (atomic loads, page boundary checks, memcpy). + +struct GorillaBitReader { + const uint8_t* data; + uint32_t pos; // next byte index to load + uint32_t data_len; // total bytes + int bits; // remaining bits in cur_byte (0..8) + uint8_t cur_byte; + + FORCE_INLINE void load_byte_if_empty() { + if (bits == 0 && pos < data_len) { + cur_byte = data[pos++]; + bits = 8; + } + } + + FORCE_INLINE bool read_bit() { + bool bit = ((cur_byte >> (bits - 1)) & 1) == 1; + bits--; + load_byte_if_empty(); + return bit; + } + + FORCE_INLINE int64_t read_long(int n) { + int64_t value = 0; + while (n > 0) { + if (n > bits || n == 8) { + value = (value << bits) + (cur_byte & ((1 << bits) - 1)); + n -= bits; + bits = 0; + } else { + value = + (value << n) + ((cur_byte >> (bits - n)) & ((1 << n) - 1)); + bits -= n; + n = 0; + } + load_byte_if_empty(); + } + return value; + } + + FORCE_INLINE uint8_t read_control_bits(int max_bits) { + uint8_t value = 0x00; + for (int i = 0; i < max_bits; i++) { + value <<= 1; + if (read_bit()) { + value |= 0x01; + } else { + break; + } + } + return value; + } +}; + +// ── Templated raw-pointer decode helpers ────────────────────────────────── + +template +struct GorillaRawOps { + static FORCE_INLINE T read_next(GorillaBitReader& r, T& stored_value, + int& stored_leading_zeros, + int& stored_trailing_zeros); +}; + +template <> +struct GorillaRawOps { + static constexpr int VALUE_BITS = VALUE_BITS_LENGTH_32BIT; + + static FORCE_INLINE int32_t read_next(GorillaBitReader& r, + int32_t& stored_value, + int& stored_leading_zeros, + int& stored_trailing_zeros) { + uint8_t ctrl = r.read_control_bits(2); + switch (ctrl) { + case 3: { + stored_leading_zeros = + (int)r.read_long(LEADING_ZERO_BITS_LENGTH_32BIT); + uint8_t sig = + (uint8_t)r.read_long(MEANINGFUL_XOR_BITS_LENGTH_32BIT); + sig++; + stored_trailing_zeros = VALUE_BITS - sig - stored_leading_zeros; + } + // fallthrough + case 2: { + int32_t xor_value = (int32_t)r.read_long( + VALUE_BITS - stored_leading_zeros - stored_trailing_zeros); + xor_value = static_cast(xor_value) + << stored_trailing_zeros; + stored_value ^= xor_value; + } + // fallthrough + default: + return stored_value; + } + return stored_value; + } +}; + +template <> +struct GorillaRawOps { + static constexpr int VALUE_BITS = VALUE_BITS_LENGTH_64BIT; + + static FORCE_INLINE int64_t read_next(GorillaBitReader& r, + int64_t& stored_value, + int& stored_leading_zeros, + int& stored_trailing_zeros) { + uint8_t ctrl = r.read_control_bits(2); + switch (ctrl) { + case 3: { + stored_leading_zeros = + (int)r.read_long(LEADING_ZERO_BITS_LENGTH_64BIT); + uint8_t sig = + (uint8_t)r.read_long(MEANINGFUL_XOR_BITS_LENGTH_64BIT); + sig++; + stored_trailing_zeros = VALUE_BITS - sig - stored_leading_zeros; + } + // fallthrough + case 2: { + int64_t xor_value = r.read_long( + VALUE_BITS - stored_leading_zeros - stored_trailing_zeros); + xor_value = static_cast(xor_value) + << stored_trailing_zeros; + stored_value ^= xor_value; + } + // fallthrough + default: + return stored_value; + } + return stored_value; + } +}; + +// ────────────────────────────────────────────────────────────────────────── + template class GorillaDecoder : public Decoder { public: @@ -127,6 +263,152 @@ class GorillaDecoder : public Decoder { int read_String(common::String& ret_value, common::PageArena& pa, common::ByteStream& in) override; + // Batch overrides — declared here, defined after template specializations + int read_batch_int32(int32_t* out, int capacity, int& actual, + common::ByteStream& in) override; + int read_batch_int64(int64_t* out, int capacity, int& actual, + common::ByteStream& in) override; + int skip_int32(int count, int& skipped, common::ByteStream& in) override; + int skip_int64(int count, int& skipped, common::ByteStream& in) override; + + protected: + // ── Batch decode using raw pointer (bypasses ByteStream) ───────────── + // The decode() contract: + // stored_value_ holds the "next" value to be returned. + // decode() returns stored_value_, then advances via cache_next(). + // has_next_==false means the ending sentinel was hit. + // + // batch_decode_raw replicates this logic using GorillaBitReader on the + // wrapped contiguous buffer, then syncs state back to ByteStream. + int batch_decode_raw(T* out, int capacity, int& actual, T ending, + common::ByteStream& in) { + if (!in.is_wrapped()) { + return batch_decode_fallback(out, capacity, actual, ending, in); + } + + const uint8_t* base = + (const uint8_t*)in.get_wrapped_buf() + in.read_pos(); + uint32_t remain = in.remaining_size(); + + GorillaBitReader r; + r.data = base; + r.pos = 0; + r.data_len = remain; + r.bits = bits_left_; + r.cur_byte = buffer_; + + actual = 0; + + // Bootstrap first value if needed (mirrors decode()'s first-call path) + if (UNLIKELY(!first_value_was_read_)) { + if (r.bits == 0 && r.pos >= r.data_len) goto done; + r.load_byte_if_empty(); + stored_value_ = (T)r.read_long(GorillaRawOps::VALUE_BITS); + first_value_was_read_ = true; + // Save the first value before cache_next mutates stored_value_ + T first_value = stored_value_; + // cache_next: read_next then check ending + GorillaRawOps::read_next(r, stored_value_, stored_leading_zeros_, + stored_trailing_zeros_); + if (stored_value_ == ending) { + has_next_ = false; + } else { + has_next_ = true; + } + // Output the first value + out[actual++] = first_value; + if (!has_next_ || actual >= capacity) goto done; + } + + // Main batch loop + while (actual < capacity && has_next_) { + out[actual++] = stored_value_; + GorillaRawOps::read_next(r, stored_value_, stored_leading_zeros_, + stored_trailing_zeros_); + if (stored_value_ == ending) { + has_next_ = false; + } + } + + done: + // Sync bit-reader state back + buffer_ = r.cur_byte; + bits_left_ = r.bits; + in.wrapped_buf_advance_read_pos(r.pos); + return common::E_OK; + } + + int batch_skip_raw(int count, int& skipped, T ending, + common::ByteStream& in) { + if (!in.is_wrapped()) { + return batch_skip_fallback(count, skipped, ending, in); + } + + const uint8_t* base = + (const uint8_t*)in.get_wrapped_buf() + in.read_pos(); + uint32_t remain = in.remaining_size(); + + GorillaBitReader r; + r.data = base; + r.pos = 0; + r.data_len = remain; + r.bits = bits_left_; + r.cur_byte = buffer_; + + skipped = 0; + + if (UNLIKELY(!first_value_was_read_)) { + if (r.bits == 0 && r.pos >= r.data_len) goto done; + r.load_byte_if_empty(); + stored_value_ = (T)r.read_long(GorillaRawOps::VALUE_BITS); + first_value_was_read_ = true; + GorillaRawOps::read_next(r, stored_value_, stored_leading_zeros_, + stored_trailing_zeros_); + if (stored_value_ == ending) { + has_next_ = false; + } else { + has_next_ = true; + } + // The first value counts as one skip + skipped++; + if (!has_next_ || skipped >= count) goto done; + } + + while (skipped < count && has_next_) { + skipped++; + GorillaRawOps::read_next(r, stored_value_, stored_leading_zeros_, + stored_trailing_zeros_); + if (stored_value_ == ending) { + has_next_ = false; + } + } + + done: + buffer_ = r.cur_byte; + bits_left_ = r.bits; + in.wrapped_buf_advance_read_pos(r.pos); + return common::E_OK; + } + + int batch_decode_fallback(T* out, int capacity, int& actual, T ending, + common::ByteStream& in) { + actual = 0; + while (actual < capacity && has_remaining(in)) { + out[actual++] = decode(in); + } + return common::E_OK; + } + + int batch_skip_fallback(int count, int& skipped, T ending, + common::ByteStream& in) { + skipped = 0; + while (skipped < count && has_remaining(in)) { + decode(in); + skipped++; + } + return common::E_OK; + } + public: common::TSEncoding type_; T stored_value_; @@ -254,18 +536,18 @@ FORCE_INLINE int64_t GorillaDecoder::decode(common::ByteStream& in) { class FloatGorillaDecoder : public GorillaDecoder { public: - int read_boolean(bool& ret_value, common::ByteStream& in); - int read_int32(int32_t& ret_value, common::ByteStream& in); - int read_int64(int64_t& ret_value, common::ByteStream& in); - int read_float(float& ret_value, common::ByteStream& in); - int read_double(double& ret_value, common::ByteStream& in); + int read_boolean(bool& ret_value, common::ByteStream& in) override; + int read_int32(int32_t& ret_value, common::ByteStream& in) override; + int read_int64(int64_t& ret_value, common::ByteStream& in) override; + int read_float(float& ret_value, common::ByteStream& in) override; + int read_double(double& ret_value, common::ByteStream& in) override; float decode(common::ByteStream& in) { int32_t value_int = GorillaDecoder::decode(in); return common::int_to_float(value_int); } - int32_t cache_next(common::ByteStream& in) { + int32_t cache_next(common::ByteStream& in) override { read_next(in); if (stored_value_ == common::float_to_int(GORILLA_ENCODING_ENDING_FLOAT)) { @@ -273,22 +555,46 @@ class FloatGorillaDecoder : public GorillaDecoder { } return stored_value_; } + + int read_batch_float(float* out, int capacity, int& actual, + common::ByteStream& in) override { + int32_t ending = common::float_to_int(GORILLA_ENCODING_ENDING_FLOAT); + actual = 0; + while (actual < capacity && has_remaining(in)) { + int32_t buf[129]; + int batch = std::min(129, capacity - actual); + int buf_actual = 0; + int ret = batch_decode_raw(buf, batch, buf_actual, ending, in); + if (ret != common::E_OK) return ret; + if (buf_actual == 0) break; + for (int i = 0; i < buf_actual; i++) { + out[actual + i] = common::int_to_float(buf[i]); + } + actual += buf_actual; + } + return common::E_OK; + } + + int skip_float(int count, int& skipped, common::ByteStream& in) override { + int32_t ending = common::float_to_int(GORILLA_ENCODING_ENDING_FLOAT); + return batch_skip_raw(count, skipped, ending, in); + } }; class DoubleGorillaDecoder : public GorillaDecoder { public: - int read_boolean(bool& ret_value, common::ByteStream& in); - int read_int32(int32_t& ret_value, common::ByteStream& in); - int read_int64(int64_t& ret_value, common::ByteStream& in); - int read_float(float& ret_value, common::ByteStream& in); - int read_double(double& ret_value, common::ByteStream& in); + int read_boolean(bool& ret_value, common::ByteStream& in) override; + int read_int32(int32_t& ret_value, common::ByteStream& in) override; + int read_int64(int64_t& ret_value, common::ByteStream& in) override; + int read_float(float& ret_value, common::ByteStream& in) override; + int read_double(double& ret_value, common::ByteStream& in) override; double decode(common::ByteStream& in) { int64_t value_long = GorillaDecoder::decode(in); return common::long_to_double(value_long); } - int64_t cache_next(common::ByteStream& in) { + int64_t cache_next(common::ByteStream& in) override { read_next(in); if (stored_value_ == common::double_to_long(GORILLA_ENCODING_ENDING_DOUBLE)) { @@ -296,12 +602,88 @@ class DoubleGorillaDecoder : public GorillaDecoder { } return stored_value_; } + + int read_batch_double(double* out, int capacity, int& actual, + common::ByteStream& in) override { + int64_t ending = common::double_to_long(GORILLA_ENCODING_ENDING_DOUBLE); + actual = 0; + while (actual < capacity && has_remaining(in)) { + int64_t buf[129]; + int batch = std::min(129, capacity - actual); + int buf_actual = 0; + int ret = batch_decode_raw(buf, batch, buf_actual, ending, in); + if (ret != common::E_OK) return ret; + if (buf_actual == 0) break; + for (int i = 0; i < buf_actual; i++) { + out[actual + i] = common::long_to_double(buf[i]); + } + actual += buf_actual; + } + return common::E_OK; + } + + int skip_double(int count, int& skipped, common::ByteStream& in) override { + int64_t ending = common::double_to_long(GORILLA_ENCODING_ENDING_DOUBLE); + return batch_skip_raw(count, skipped, ending, in); + } }; typedef GorillaDecoder IntGorillaDecoder; typedef GorillaDecoder LongGorillaDecoder; -// wrap as Decoder interface +// ── IntGorillaDecoder batch/skip overrides ───────────────────────────────── +template <> +inline int GorillaDecoder::read_batch_int32(int32_t* out, int capacity, + int& actual, + common::ByteStream& in) { + return batch_decode_raw(out, capacity, actual, + GORILLA_ENCODING_ENDING_INTEGER, in); +} +template <> +inline int GorillaDecoder::read_batch_int64(int64_t*, int, int& actual, + common::ByteStream&) { + actual = 0; + return common::E_NOT_SUPPORT; +} +template <> +inline int GorillaDecoder::skip_int32(int count, int& skipped, + common::ByteStream& in) { + return batch_skip_raw(count, skipped, GORILLA_ENCODING_ENDING_INTEGER, in); +} +template <> +inline int GorillaDecoder::skip_int64(int, int& skipped, + common::ByteStream&) { + skipped = 0; + return common::E_NOT_SUPPORT; +} + +// ── LongGorillaDecoder batch/skip overrides ─────────────────────────────── +template <> +inline int GorillaDecoder::read_batch_int32(int32_t*, int, int& actual, + common::ByteStream&) { + actual = 0; + return common::E_NOT_SUPPORT; +} +template <> +inline int GorillaDecoder::read_batch_int64(int64_t* out, int capacity, + int& actual, + common::ByteStream& in) { + return batch_decode_raw(out, capacity, actual, GORILLA_ENCODING_ENDING_LONG, + in); +} +template <> +inline int GorillaDecoder::skip_int32(int, int& skipped, + common::ByteStream&) { + skipped = 0; + return common::E_NOT_SUPPORT; +} +template <> +inline int GorillaDecoder::skip_int64(int count, int& skipped, + common::ByteStream& in) { + return batch_skip_raw(count, skipped, GORILLA_ENCODING_ENDING_LONG, in); +} + +// ── Scalar Decoder interface wrappers (unchanged) ───────────────────────── template <> FORCE_INLINE int IntGorillaDecoder::read_boolean(bool& ret_value, common::ByteStream& in) { diff --git a/cpp/src/encoding/int32_rle_decoder.h b/cpp/src/encoding/int32_rle_decoder.h index ef7b6f095..aee9048a1 100644 --- a/cpp/src/encoding/int32_rle_decoder.h +++ b/cpp/src/encoding/int32_rle_decoder.h @@ -37,8 +37,6 @@ class Int32RleDecoder : public Decoder { int bitpacking_num_; bool is_length_and_bitwidth_readed_; int current_count_; - bool is_rle_run_; - int32_t rle_value_; common::ByteStream byte_cache_{common::MOD_DECODER_OBJ}; int32_t* current_buffer_; Int32Packer* packer_; @@ -51,8 +49,6 @@ class Int32RleDecoder : public Decoder { bitpacking_num_(0), is_length_and_bitwidth_readed_(false), current_count_(0), - is_rle_run_(false), - rle_value_(0), byte_cache_(1024, common::MOD_DECODER_OBJ), current_buffer_(nullptr), packer_(nullptr), @@ -64,14 +60,13 @@ class Int32RleDecoder : public Decoder { } int read_boolean(bool& ret_value, common::ByteStream& in) override { int32_t bool_value; - int ret = read_int32(bool_value, in); - if (ret == common::E_OK) { - ret_value = bool_value != 0; - } - return ret; + read_int32(bool_value, in); + ret_value = bool_value == 0 ? false : true; + return common::E_OK; } int read_int32(int32_t& ret_value, common::ByteStream& in) override { - return read_int(ret_value, in); + ret_value = static_cast(read_int(in)); + return common::E_OK; } int read_int64(int64_t& ret_value, common::ByteStream& in) override { return common::E_TYPE_NOT_MATCH; @@ -94,8 +89,6 @@ class Int32RleDecoder : public Decoder { bit_width_ = 0; bitpacking_num_ = 0; current_count_ = 0; - is_rle_run_ = false; - rle_value_ = 0; } bool has_next(common::ByteStream& buffer) { @@ -110,69 +103,30 @@ class Int32RleDecoder : public Decoder { return current_count_ > 0 || byte_cache_.remaining_size() > 0; } - int read_int(int32_t& result, common::ByteStream& buffer) { - int ret = common::E_OK; + int32_t read_int(common::ByteStream& buffer) { if (!is_length_and_bitwidth_readed_) { // start to reader a new rle+bit-packing pattern - if (RET_FAIL(read_length_and_bitwidth(buffer))) { - return ret; - } + read_length_and_bitwidth(buffer); } if (current_count_ == 0) { - // The header is encoded as an unsigned varint where: - // low bit = 0 => RLE run: header_value >> 1 is the run - // count low bit = 1 => bit-packing: header_value >> 1 is the - // group count - uint32_t header_value = 0; - if (RET_FAIL(common::SerializationUtil::read_var_uint( - header_value, byte_cache_))) { + uint8_t header; + int ret = common::E_OK; + if (RET_FAIL( + common::SerializationUtil::read_ui8(header, byte_cache_))) { return ret; } - if (header_value & 1) { - if (RET_FAIL(call_read_bit_packing_buffer(header_value))) { - return ret; - } - } else { - if (RET_FAIL(call_read_rle_run(header_value))) { - return ret; - } - } + call_read_bit_packing_buffer(header); } --current_count_; - result = is_rle_run_ - ? rle_value_ - : current_buffer_[bitpacking_num_ - current_count_ - 1]; + int32_t result = current_buffer_[bitpacking_num_ - current_count_ - 1]; if (!has_next_package()) { is_length_and_bitwidth_readed_ = false; } - return ret; + return result; } - int call_read_rle_run(uint32_t header_value) { - int ret = common::E_OK; - int run_length = (int)(header_value >> 1); - if (run_length <= 0) { - return common::E_DECODE_ERR; - } - int byte_width = (bit_width_ + 7) / 8; - // Read the repeated value (stored as byte_width bytes, little-endian) - int32_t value = 0; - for (int i = 0; i < byte_width; i++) { - uint8_t b; - if (RET_FAIL(common::SerializationUtil::read_ui8(b, byte_cache_))) { - return ret; - } - value |= ((int32_t)b) << (i * 8); - } - rle_value_ = value; - is_rle_run_ = true; - current_count_ = run_length; - bitpacking_num_ = run_length; - return ret; - } - - int call_read_bit_packing_buffer(uint32_t header_value) { - int bit_packed_group_count = (int)(header_value >> 1); + int call_read_bit_packing_buffer(uint8_t header) { + int bit_packed_group_count = (int)(header >> 1); // in last bit-packing group, there may be some useless value, // lastBitPackedNum indicates how many values is useful uint8_t last_bit_packed_num; @@ -185,7 +139,6 @@ class Int32RleDecoder : public Decoder { current_count_ = (bit_packed_group_count - 1) * 8 + last_bit_packed_num; bitpacking_num_ = current_count_; - is_rle_run_ = false; } else { return common::E_DECODE_ERR; } @@ -283,10 +236,8 @@ class Int32RleDecoder : public Decoder { bitpacking_num_ = 0; is_length_and_bitwidth_readed_ = false; current_count_ = 0; - is_rle_run_ = false; - rle_value_ = 0; if (current_buffer_) { - common::mem_free(current_buffer_); + delete[] current_buffer_; current_buffer_ = nullptr; } if (packer_) { diff --git a/cpp/src/encoding/int32_sprintz_decoder.h b/cpp/src/encoding/int32_sprintz_decoder.h index a7c92eede..3d15597ee 100644 --- a/cpp/src/encoding/int32_sprintz_decoder.h +++ b/cpp/src/encoding/int32_sprintz_decoder.h @@ -125,9 +125,7 @@ class Int32SprintzDecoder : public SprintzDecoder { decode_size_ = bit_width_ & ~(1 << 7); Int32RleDecoder decoder; for (int i = 0; i < decode_size_; ++i) { - if (RET_FAIL(decoder.read_int(current_buffer_[i], input))) { - return ret; - } + current_buffer_[i] = decoder.read_int(input); } } else { decode_size_ = block_size_ + 1; diff --git a/cpp/src/encoding/int64_rle_decoder.h b/cpp/src/encoding/int64_rle_decoder.h index df8e17838..8010fe0f7 100644 --- a/cpp/src/encoding/int64_rle_decoder.h +++ b/cpp/src/encoding/int64_rle_decoder.h @@ -37,8 +37,6 @@ class Int64RleDecoder : public Decoder { int bitpacking_num_; bool is_length_and_bitwidth_readed_; int current_count_; - bool is_rle_run_; - int64_t rle_value_; common::ByteStream byte_cache_{common::MOD_DECODER_OBJ}; int64_t* current_buffer_; Int64Packer* packer_; @@ -51,8 +49,6 @@ class Int64RleDecoder : public Decoder { bitpacking_num_(0), is_length_and_bitwidth_readed_(false), current_count_(0), - is_rle_run_(false), - rle_value_(0), byte_cache_(1024, common::MOD_DECODER_OBJ), current_buffer_(nullptr), packer_(nullptr), @@ -69,7 +65,8 @@ class Int64RleDecoder : public Decoder { return common::E_TYPE_NOT_MATCH; } int read_int64(int64_t& ret_value, common::ByteStream& in) override { - return read_int(ret_value, in); + ret_value = read_int(in); + return common::E_OK; } int read_float(float& ret_value, common::ByteStream& in) override { return common::E_TYPE_NOT_MATCH; @@ -89,8 +86,6 @@ class Int64RleDecoder : public Decoder { bit_width_ = 0; bitpacking_num_ = 0; current_count_ = 0; - is_rle_run_ = false; - rle_value_ = 0; } bool has_next(common::ByteStream& buffer) { @@ -105,69 +100,30 @@ class Int64RleDecoder : public Decoder { return current_count_ > 0 || byte_cache_.remaining_size() > 0; } - int read_int(int64_t& result, common::ByteStream& buffer) { - int ret = common::E_OK; + int64_t read_int(common::ByteStream& buffer) { if (!is_length_and_bitwidth_readed_) { // start to reader a new rle+bit-packing pattern - if (RET_FAIL(read_length_and_bitwidth(buffer))) { - return ret; - } + read_length_and_bitwidth(buffer); } if (current_count_ == 0) { - // The header is encoded as an unsigned varint where: - // low bit = 0 => RLE run: header_value >> 1 is the run - // count low bit = 1 => bit-packing: header_value >> 1 is the - // group count - uint32_t header_value = 0; - if (RET_FAIL(common::SerializationUtil::read_var_uint( - header_value, byte_cache_))) { + uint8_t header; + int ret = common::E_OK; + if (RET_FAIL( + common::SerializationUtil::read_ui8(header, byte_cache_))) { return ret; } - if (header_value & 1) { - if (RET_FAIL(call_read_bit_packing_buffer(header_value))) { - return ret; - } - } else { - if (RET_FAIL(call_read_rle_run(header_value))) { - return ret; - } - } + call_read_bit_packing_buffer(header); } --current_count_; - result = is_rle_run_ - ? rle_value_ - : current_buffer_[bitpacking_num_ - current_count_ - 1]; + int64_t result = current_buffer_[bitpacking_num_ - current_count_ - 1]; if (!has_next_package()) { is_length_and_bitwidth_readed_ = false; } - return ret; + return result; } - int call_read_rle_run(uint32_t header_value) { - int ret = common::E_OK; - int run_length = (int)(header_value >> 1); - if (run_length <= 0) { - return common::E_DECODE_ERR; - } - int byte_width = (bit_width_ + 7) / 8; - // Read the repeated value (stored as byte_width bytes, little-endian) - int64_t value = 0; - for (int i = 0; i < byte_width; i++) { - uint8_t b; - if (RET_FAIL(common::SerializationUtil::read_ui8(b, byte_cache_))) { - return ret; - } - value |= ((int64_t)b) << (i * 8); - } - rle_value_ = value; - is_rle_run_ = true; - current_count_ = run_length; - bitpacking_num_ = run_length; - return ret; - } - - int call_read_bit_packing_buffer(uint32_t header_value) { - int bit_packed_group_count = (int)(header_value >> 1); + int call_read_bit_packing_buffer(uint8_t header) { + int bit_packed_group_count = (int)(header >> 1); // in last bit-packing group, there may be some useless value, // lastBitPackedNum indicates how many values is useful uint8_t last_bit_packed_num; @@ -180,27 +136,25 @@ class Int64RleDecoder : public Decoder { current_count_ = (bit_packed_group_count - 1) * 8 + last_bit_packed_num; bitpacking_num_ = current_count_; - is_rle_run_ = false; } else { - return common::E_DECODE_ERR; + printf( + "tsfile-encoding IntRleDecoder: bit_packed_group_count %d, " + "smaller " + "than 1", + bit_packed_group_count); } - ret = read_bit_packing_buffer(bit_packed_group_count, - last_bit_packed_num); + read_bit_packing_buffer(bit_packed_group_count, last_bit_packed_num); return ret; } - int read_bit_packing_buffer(int bit_packed_group_count, - int last_bit_packed_num) { - int ret = common::E_OK; + void read_bit_packing_buffer(int bit_packed_group_count, + int last_bit_packed_num) { if (current_buffer_ != nullptr) { common::mem_free(current_buffer_); } current_buffer_ = static_cast( common::mem_alloc(sizeof(int64_t) * bit_packed_group_count * 8, common::MOD_DECODER_OBJ)); - if (IS_NULL(current_buffer_)) { - return common::E_OOM; - } int bytes_to_read = bit_packed_group_count * bit_width_; if (bytes_to_read > (int)byte_cache_.remaining_size()) { bytes_to_read = byte_cache_.remaining_size(); @@ -208,17 +162,13 @@ class Int64RleDecoder : public Decoder { std::vector bytes(bytes_to_read); for (int i = 0; i < bytes_to_read; i++) { - if (RET_FAIL(common::SerializationUtil::read_ui8(bytes[i], - byte_cache_))) { - return ret; - } + common::SerializationUtil::read_ui8(bytes[i], byte_cache_); } // save all int values in currentBuffer packer_->unpack_all_values( bytes.data(), bytes_to_read, current_buffer_); // decode from bytes, save in currentBuffer - return ret; } int read_length_and_bitwidth(common::ByteStream& buffer) { @@ -227,9 +177,6 @@ class Int64RleDecoder : public Decoder { common::SerializationUtil::read_var_uint(length_, buffer))) { return common::E_PARTIAL_READ; } else { - if (tmp_buf_) { - common::mem_free(tmp_buf_); - } tmp_buf_ = (uint8_t*)common::mem_alloc(length_, common::MOD_DECODER_OBJ); if (tmp_buf_ == nullptr) { @@ -275,8 +222,6 @@ class Int64RleDecoder : public Decoder { bitpacking_num_ = 0; is_length_and_bitwidth_readed_ = false; current_count_ = 0; - is_rle_run_ = false; - rle_value_ = 0; if (current_buffer_) { common::mem_free(current_buffer_); current_buffer_ = nullptr; diff --git a/cpp/src/encoding/int64_sprintz_decoder.h b/cpp/src/encoding/int64_sprintz_decoder.h index 7b0827688..a7e3fdd27 100644 --- a/cpp/src/encoding/int64_sprintz_decoder.h +++ b/cpp/src/encoding/int64_sprintz_decoder.h @@ -124,9 +124,7 @@ class Int64SprintzDecoder : public SprintzDecoder { decode_size_ = bit_width_ & ~(1 << 7); Int64RleDecoder decoder; for (int i = 0; i < decode_size_; ++i) { - if (RET_FAIL(decoder.read_int(current_buffer_[i], input))) { - return ret; - } + current_buffer_[i] = decoder.read_int(input); } } else { decode_size_ = block_size_ + 1; diff --git a/cpp/src/encoding/plain_decoder.h b/cpp/src/encoding/plain_decoder.h index c2627f71d..5a55b551c 100644 --- a/cpp/src/encoding/plain_decoder.h +++ b/cpp/src/encoding/plain_decoder.h @@ -20,6 +20,9 @@ #ifndef ENCODING_PLAIN_DECODER_H #define ENCODING_PLAIN_DECODER_H +#include +#include + #include "encoding/decoder.h" namespace storage { @@ -62,6 +65,128 @@ class PlainDecoder : public Decoder { common::ByteStream& in) override { return common::SerializationUtil::read_mystring(ret_String, &pa, in); } + + // ── Batch overrides ────────────────────────────────────────────────────── + // + // INT32: PLAIN encoding uses varint (variable stride). Override to avoid + // virtual dispatch per element; actual decode is still per-value. + int read_batch_int32(int32_t* out, int capacity, int& actual, + common::ByteStream& in) override { + actual = 0; + while (actual < capacity && in.has_remaining()) { + int ret = common::SerializationUtil::read_var_int(out[actual], in); + if (ret != common::E_OK) return ret; + ++actual; + } + return common::E_OK; + } + + int skip_int32(int count, int& skipped, common::ByteStream& in) override { + skipped = 0; + int32_t dummy; + while (skipped < count && in.has_remaining()) { + int ret = common::SerializationUtil::read_var_int(dummy, in); + if (ret != common::E_OK) return ret; + ++skipped; + } + return common::E_OK; + } + + // INT64: fixed 8-byte big-endian. Direct pointer access for wrapped + // ByteStream, __builtin_bswap64 for byte-swap (single REV on ARM64). + int read_batch_int64(int64_t* out, int capacity, int& actual, + common::ByteStream& in) override { + actual = 0; + int n = static_cast(std::min( + in.remaining_size() / 8, static_cast(capacity))); + if (n <= 0) return common::E_OK; + + const uint8_t* src = + (const uint8_t*)in.get_wrapped_buf() + in.read_pos(); + in.wrapped_buf_advance_read_pos(static_cast(n) * 8); + actual = n; + for (int i = 0; i < n; ++i) { + uint64_t v; + memcpy(&v, src + i * 8, 8); + out[i] = static_cast(__builtin_bswap64(v)); + } + return common::E_OK; + } + + int skip_int64(int count, int& skipped, common::ByteStream& in) override { + skipped = static_cast(std::min( + in.remaining_size() / 8, static_cast(count))); + if (skipped <= 0) { + skipped = 0; + return common::E_OK; + } + in.wrapped_buf_advance_read_pos(static_cast(skipped) * 8); + return common::E_OK; + } + + int skip_float(int count, int& skipped, common::ByteStream& in) override { + skipped = static_cast(std::min( + in.remaining_size() / 4, static_cast(count))); + if (skipped <= 0) { + skipped = 0; + return common::E_OK; + } + in.wrapped_buf_advance_read_pos(static_cast(skipped) * 4); + return common::E_OK; + } + + int skip_double(int count, int& skipped, common::ByteStream& in) override { + skipped = static_cast(std::min( + in.remaining_size() / 8, static_cast(count))); + if (skipped <= 0) { + skipped = 0; + return common::E_OK; + } + in.wrapped_buf_advance_read_pos(static_cast(skipped) * 8); + return common::E_OK; + } + + // FLOAT: fixed 4-byte big-endian IEEE 754. + int read_batch_float(float* out, int capacity, int& actual, + common::ByteStream& in) override { + actual = 0; + int n = static_cast(std::min( + in.remaining_size() / 4, static_cast(capacity))); + if (n <= 0) return common::E_OK; + + const uint8_t* src = + (const uint8_t*)in.get_wrapped_buf() + in.read_pos(); + in.wrapped_buf_advance_read_pos(static_cast(n) * 4); + actual = n; + for (int i = 0; i < n; ++i) { + uint32_t v; + memcpy(&v, src + i * 4, 4); + v = __builtin_bswap32(v); + memcpy(&out[i], &v, 4); + } + return common::E_OK; + } + + // DOUBLE: fixed 8-byte big-endian IEEE 754. + int read_batch_double(double* out, int capacity, int& actual, + common::ByteStream& in) override { + actual = 0; + int n = static_cast(std::min( + in.remaining_size() / 8, static_cast(capacity))); + if (n <= 0) return common::E_OK; + + const uint8_t* src = + (const uint8_t*)in.get_wrapped_buf() + in.read_pos(); + in.wrapped_buf_advance_read_pos(static_cast(n) * 8); + actual = n; + for (int i = 0; i < n; ++i) { + uint64_t v; + memcpy(&v, src + i * 8, 8); + v = __builtin_bswap64(v); + memcpy(&out[i], &v, 8); + } + return common::E_OK; + } }; } // end namespace storage diff --git a/cpp/src/encoding/plain_encoder.h b/cpp/src/encoding/plain_encoder.h index b768c9bf0..fd52e36d4 100644 --- a/cpp/src/encoding/plain_encoder.h +++ b/cpp/src/encoding/plain_encoder.h @@ -20,50 +20,180 @@ #ifndef ENCODING_PLAIN_ENCODER_H #define ENCODING_PLAIN_ENCODER_H +#include + #include "encoder.h" +#if defined(__ARM_NEON) || defined(__ARM_NEON__) +#include +#define TSFILE_HAS_NEON 1 +#endif + namespace storage { class PlainEncoder : public Encoder { public: PlainEncoder() {} ~PlainEncoder() { destroy(); } - void destroy() { /* do nothing for PlainEncoder */ + void destroy() override { /* do nothing for PlainEncoder */ } - void reset() { /* do thing for PlainEncoder */ + void reset() override { /* do thing for PlainEncoder */ } - FORCE_INLINE int encode(bool value, common::ByteStream& out_stream) { + FORCE_INLINE int encode(bool value, + common::ByteStream& out_stream) override { return common::SerializationUtil::write_i8(value ? 1 : 0, out_stream); } - FORCE_INLINE int encode(int32_t value, common::ByteStream& out_stream) { + FORCE_INLINE int encode(int32_t value, + common::ByteStream& out_stream) override { return common::SerializationUtil::write_var_int(value, out_stream); } - FORCE_INLINE int encode(int64_t value, common::ByteStream& out_stream) { + FORCE_INLINE int encode(int64_t value, + common::ByteStream& out_stream) override { return common::SerializationUtil::write_i64(value, out_stream); } - FORCE_INLINE int encode(float value, common::ByteStream& out_stream) { + FORCE_INLINE int encode(float value, + common::ByteStream& out_stream) override { return common::SerializationUtil::write_float(value, out_stream); } - FORCE_INLINE int encode(double value, common::ByteStream& out_stream) { + FORCE_INLINE int encode(double value, + common::ByteStream& out_stream) override { return common::SerializationUtil::write_double(value, out_stream); } FORCE_INLINE int encode(common::String value, - common::ByteStream& out_stream) { + common::ByteStream& out_stream) override { return common::SerializationUtil::write_mystring(value, out_stream); } - int flush(common::ByteStream& out_stream) { + int flush(common::ByteStream& out_stream) override { // do nothing for PlainEncoder return common::E_OK; } - int get_max_byte_size() { return 0; } + int get_max_byte_size() override { return 0; } + + // Optimized batch encoding: directly byte-swap into ByteStream page buffer. + // Avoids per-value write_buf overhead entirely — only calls acquire_buf() + // once per page boundary crossing. + int encode_batch(const int64_t* values, uint32_t count, + common::ByteStream& out_stream) override { + if (count == 0) return common::E_OK; + uint32_t offset = 0; + while (offset < count) { + common::ByteStream::Buffer buf = out_stream.acquire_buf(); + if (UNLIKELY(buf.buf_ == nullptr)) return common::E_OOM; + // How many int64 values fit in the remaining page space? + uint32_t capacity = buf.len_ / 8; + if (capacity == 0) { + // Page has < 8 bytes left, fall back to write_buf for this one + return Encoder::encode_batch(values + offset, count - offset, + out_stream); + } + uint32_t batch = std::min(count - offset, capacity); + uint8_t* dst = (uint8_t*)buf.buf_; + const int64_t* src = values + offset; + uint32_t i = 0; +#if TSFILE_HAS_NEON + // NEON: byte-reverse 2 x int64 per iteration + for (; i + 2 <= batch; i += 2) { + uint8x16_t v = vld1q_u8((const uint8_t*)&src[i]); + v = vrev64q_u8(v); + vst1q_u8(dst, v); + dst += 16; + } +#endif + // Scalar tail + for (; i < batch; i++) { + uint64_t v = (uint64_t)src[i]; + dst[0] = (uint8_t)(v >> 56); + dst[1] = (uint8_t)(v >> 48); + dst[2] = (uint8_t)(v >> 40); + dst[3] = (uint8_t)(v >> 32); + dst[4] = (uint8_t)(v >> 24); + dst[5] = (uint8_t)(v >> 16); + dst[6] = (uint8_t)(v >> 8); + dst[7] = (uint8_t)(v); + dst += 8; + } + out_stream.buffer_used(batch * 8); + offset += batch; + } + return common::E_OK; + } + + int encode_batch(const double* values, uint32_t count, + common::ByteStream& out_stream) override { + return encode_batch(reinterpret_cast(values), count, + out_stream); + } + + int encode_batch(const float* values, uint32_t count, + common::ByteStream& out_stream) override { + if (count == 0) return common::E_OK; + uint32_t offset = 0; + while (offset < count) { + common::ByteStream::Buffer buf = out_stream.acquire_buf(); + if (UNLIKELY(buf.buf_ == nullptr)) return common::E_OOM; + uint32_t capacity = buf.len_ / 4; + if (capacity == 0) { + return Encoder::encode_batch(values + offset, count - offset, + out_stream); + } + uint32_t batch = std::min(count - offset, capacity); + uint8_t* dst = (uint8_t*)buf.buf_; + const float* src = values + offset; + uint32_t i = 0; +#if TSFILE_HAS_NEON + // NEON: byte-reverse 4 x float (32-bit) per iteration + for (; i + 4 <= batch; i += 4) { + uint8x16_t v = vld1q_u8((const uint8_t*)&src[i]); + v = vrev32q_u8(v); + vst1q_u8(dst, v); + dst += 16; + } +#endif + for (; i < batch; i++) { + uint32_t v; + memcpy(&v, &src[i], sizeof(float)); + dst[0] = (uint8_t)(v >> 24); + dst[1] = (uint8_t)(v >> 16); + dst[2] = (uint8_t)(v >> 8); + dst[3] = (uint8_t)(v); + dst += 4; + } + out_stream.buffer_used(batch * 4); + offset += batch; + } + return common::E_OK; + } + + // Batch encode strings from Arrow-style offset+buffer layout. + // Each string is serialized as: var_int(len) + raw bytes. + int encode_string_batch(const char* buffer, const uint32_t* offsets, + uint32_t start_idx, uint32_t count, + common::ByteStream& out_stream) override { + int ret = common::E_OK; + for (uint32_t i = 0; i < count; i++) { + uint32_t idx = start_idx + i; + uint32_t len = offsets[idx + 1] - offsets[idx]; + if (RET_FAIL(common::SerializationUtil::write_var_int( + (int32_t)len, out_stream))) { + return ret; + } + if (len > 0) { + if (RET_FAIL( + out_stream.write_buf(buffer + offsets[idx], len))) { + return ret; + } + } + } + return ret; + } }; } // end namespace storage diff --git a/cpp/src/encoding/ts2diff_decoder.h b/cpp/src/encoding/ts2diff_decoder.h index 32584546d..d0a217982 100644 --- a/cpp/src/encoding/ts2diff_decoder.h +++ b/cpp/src/encoding/ts2diff_decoder.h @@ -23,13 +23,185 @@ #include #include +#include #include "common/allocator/alloc_base.h" #include "common/allocator/byte_stream.h" #include "decoder.h" #include "utils/util_define.h" +#ifdef ENABLE_SIMD +#include "simde/x86/avx2.h" +#endif + namespace storage { + +// ============================================================================ +// SIMD batch decode helpers (INT32) +// ============================================================================ +#ifdef ENABLE_SIMD + +// Decode 4 INT32 values from bit-packed data using SIMD gather + shift. +// @in: pointer to the start of packed bit data for the block +// @bit_width: bits per delta value +// @delta_min: minimum delta offset for this block +// @index: current position within the block (0-based, among write_index_ +// deltas) +// @base: the previous reconstructed value (for prefix-sum) +// @out: output array (4 values written) +// Returns: the last reconstructed value (new base for next group) +static inline int32_t simd_decode_4_i32(const uint8_t* in, int32_t bit_width, + int32_t delta_min, int32_t index, + int32_t base, int32_t out[4]) { + static const simde__m128i SHUF_REV4 = simde_mm_setr_epi8( + 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); + + const simde__m128i VMIN4 = simde_mm_set1_epi32(delta_min); + + int32_t pos0 = index * bit_width; + int32_t pos[4] = {pos0, pos0 + bit_width, pos0 + 2 * bit_width, + pos0 + 3 * bit_width}; + int32_t bidx[4] = {pos[0] >> 3, pos[1] >> 3, pos[2] >> 3, pos[3] >> 3}; + int32_t off[4] = {pos[0] & 7, pos[1] & 7, pos[2] & 7, pos[3] & 7}; + + simde__m128i IDX = simde_mm_setr_epi32(bidx[0], bidx[1], bidx[2], bidx[3]); + simde__m128i OFF = simde_mm_setr_epi32(off[0], off[1], off[2], off[3]); + + simde__m128i V4; + + if (bit_width <= 16) { + int rshift = 32 - bit_width; + simde__m128i w32_le = simde_mm_i32gather_epi32((const int*)in, IDX, 1); + simde__m128i w32_be = simde_mm_shuffle_epi8(w32_le, SHUF_REV4); + simde__m128i U32 = simde_mm_sllv_epi32(w32_be, OFF); + simde__m128i RS32 = simde_mm_set1_epi32(rshift); + V4 = simde_mm_srlv_epi32(U32, RS32); + } else { + static const simde__m256i SHUF_REV8 = simde_mm256_setr_epi8( + 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, + 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); + int rshift = 64 - bit_width; + simde__m256i w64_le = + simde_mm256_i32gather_epi64((const int64_t*)in, IDX, 1); + simde__m256i w64_be = simde_mm256_shuffle_epi8(w64_le, SHUF_REV8); + simde__m256i OFF64 = simde_mm256_cvtepu32_epi64(OFF); + simde__m256i U64 = simde_mm256_sllv_epi64(w64_be, OFF64); + simde__m256i V64 = + simde_mm256_srl_epi64(U64, simde_mm_cvtsi32_si128(rshift)); + simde__m256i perm = simde_mm256_setr_epi32(0, 2, 4, 6, 0, 0, 0, 0); + simde__m256i comp = simde_mm256_permutevar8x32_epi32(V64, perm); + V4 = simde_mm256_castsi256_si128(comp); + } + + // Add delta_min + V4 = simde_mm_add_epi32(V4, VMIN4); + + // Prefix sum to reconstruct absolute values + simde__m128i t; + t = simde_mm_slli_si128(V4, 4); + V4 = simde_mm_add_epi32(V4, t); + t = simde_mm_slli_si128(V4, 8); + V4 = simde_mm_add_epi32(V4, t); + + // Add base + simde__m128i C4 = simde_mm_set1_epi32(base); + V4 = simde_mm_add_epi32(V4, C4); + + simde_mm_storeu_si128((simde__m128i*)out, V4); + return out[3]; +} + +// Decode 4 INT64 values from bit-packed data using SIMD. +static inline int64_t simd_decode_4_i64(const uint8_t* in, int32_t bit_width, + int64_t delta_min, int32_t index, + int64_t base, int64_t out[4]) { + static const simde__m256i SHUF_REV8 = simde_mm256_setr_epi8( + 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, + 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); + + const simde__m256i VMIN4 = simde_mm256_set1_epi64x(delta_min); + + int32_t pos0 = index * bit_width; + int32_t pos[4] = {pos0, pos0 + bit_width, pos0 + 2 * bit_width, + pos0 + 3 * bit_width}; + int32_t bidx[4] = {pos[0] >> 3, pos[1] >> 3, pos[2] >> 3, pos[3] >> 3}; + int32_t off[4] = {pos[0] & 7, pos[1] & 7, pos[2] & 7, pos[3] & 7}; + + simde__m128i IDX = simde_mm_setr_epi32(bidx[0], bidx[1], bidx[2], bidx[3]); + + int rshift = 64 - bit_width; + simde__m256i w64_le = + simde_mm256_i32gather_epi64((const int64_t*)in, IDX, 1); + simde__m256i w64_be = simde_mm256_shuffle_epi8(w64_le, SHUF_REV8); + simde__m256i OFF64 = simde_mm256_cvtepu32_epi64( + simde_mm_setr_epi32(off[0], off[1], off[2], off[3])); + simde__m256i U64 = simde_mm256_sllv_epi64(w64_be, OFF64); + simde__m256i V64 = + simde_mm256_srl_epi64(U64, simde_mm_cvtsi32_si128(rshift)); + + // Add delta_min + V64 = simde_mm256_add_epi64(V64, VMIN4); + + // Prefix sum (64-bit, 4 lanes) + simde__m256i t; + // shift by 8 bytes = 1 lane + t = simde_mm256_slli_si256(V64, 8); + V64 = simde_mm256_add_epi64(V64, t); + // cross-lane: add lane[1] to lane[2] and lane[3] + // Extract high 128 bits, add broadcast of element[1] to both elements + int64_t tmp_buf[4]; + simde_mm256_storeu_si256((simde__m256i*)tmp_buf, V64); + tmp_buf[2] += tmp_buf[1]; + tmp_buf[3] += tmp_buf[1]; + V64 = simde_mm256_loadu_si256((const simde__m256i*)tmp_buf); + + // Add base + simde__m256i C4 = simde_mm256_set1_epi64x(base); + V64 = simde_mm256_add_epi64(V64, C4); + + simde_mm256_storeu_si256((simde__m256i*)out, V64); + return out[3]; +} + +#endif // ENABLE_SIMD + +// ============================================================================ +// Scalar batch decode helpers +// ============================================================================ + +// Scalar: extract one value from bit-packed data. +// @data: pointer to packed bits (NOT advanced; caller handles position) +// @bit_pos: bit offset from start of data +// @bit_width: bits per value +static inline int64_t scalar_read_bits(const uint8_t* data, int32_t bit_pos, + int32_t bit_width) { + int64_t value = 0; + int bits = bit_width; + int byte_idx = bit_pos >> 3; + int bit_offset = bit_pos & 7; + int bits_avail = 8 - bit_offset; + + while (bits > 0) { + if (bits >= bits_avail) { + uint8_t d = data[byte_idx] & ((1 << bits_avail) - 1); + value = (value << bits_avail) | d; + bits -= bits_avail; + byte_idx++; + bits_avail = 8; + } else { + uint8_t d = + (data[byte_idx] >> (bits_avail - bits)) & ((1 << bits) - 1); + value = (value << bits) | d; + bits = 0; + } + } + return value; +} + +// ============================================================================ +// TS2DIFFDecoder template +// ============================================================================ + template class TS2DIFFDecoder : public Decoder { public: @@ -46,12 +218,14 @@ class TS2DIFFDecoder : public Decoder { previous_value_ = 0; bit_width_ = 0; current_index_ = 0; + header_peeked_ = false; } FORCE_INLINE bool has_remaining(const common::ByteStream& buffer) override { if (buffer.has_remaining()) return true; - return bits_left_ != 0 || (current_index_ <= write_index_ && - write_index_ != -1 && current_index_ != 0); + return header_peeked_ || bits_left_ != 0 || + (current_index_ <= write_index_ && write_index_ != -1 && + current_index_ != 0); } void read_header(common::ByteStream& in) { @@ -106,6 +280,18 @@ class TS2DIFFDecoder : public Decoder { int read_String(common::String& ret_value, common::PageArena& pa, common::ByteStream& in) override; + int read_batch_int32(int32_t* out, int capacity, int& actual, + common::ByteStream& in) override; + int read_batch_int64(int64_t* out, int capacity, int& actual, + common::ByteStream& in) override; + int skip_int32(int count, int& skipped, common::ByteStream& in) override; + int skip_int64(int count, int& skipped, common::ByteStream& in) override; + + bool peek_next_block_range_int64(common::ByteStream& in, int64_t& block_min, + int64_t& block_max, + int& block_count) override; + int skip_peeked_block_int64(common::ByteStream& in, int& skipped) override; + public: T first_value_; T previous_value_; @@ -116,8 +302,13 @@ class TS2DIFFDecoder : public Decoder { int bit_width_; int write_index_; int current_index_; + bool header_peeked_; }; +// ============================================================================ +// Per-value decode (unchanged) +// ============================================================================ + template <> inline int32_t TS2DIFFDecoder::decode(common::ByteStream& in) { int32_t ret_value = stored_value_; @@ -172,6 +363,372 @@ inline int64_t TS2DIFFDecoder::decode(common::ByteStream& in) { return ret_value; } +// ============================================================================ +// Batch decode: INT32 +// Decodes one full block (up to 129 values) per call using SIMD when enabled. +// ============================================================================ + +template <> +inline int TS2DIFFDecoder::read_batch_int32(int32_t* out, int capacity, + int& actual, + common::ByteStream& in) { + actual = 0; + + while (actual < capacity && has_remaining(in)) { + // If we are mid-block (current_index_ != 0), finish it per-value. + if (current_index_ != 0) { + while (actual < capacity && current_index_ != 0 && + has_remaining(in)) { + out[actual++] = decode(in); + } + continue; + } + + // Start of a new block — read header + read_header(in); + common::SerializationUtil::read_i32(delta_min_, in); + common::SerializationUtil::read_i32(first_value_, in); + bits_left_ = 0; + buffer_ = 0; + + // Output first_value + if (actual >= capacity) { + // Must consume first_value next time; set state for per-value path + current_index_ = 0; + // We already consumed the header; push first_value as stored + // and let the next call to decode() handle it. + // Actually, we need to handle this: rewind is not possible. + // So we output first_value and accept going 1 over capacity. + } + out[actual++] = first_value_; + + if (write_index_ == 0) { + // Block has only first_value, no deltas + current_index_ = 0; + continue; + } + + int32_t remaining = write_index_; + if (actual + remaining > capacity) { + // Block won't fit in output. Fall back to per-value decode. + // Stream is at packed data start; bits_left_/buffer_ are reset. + current_index_ = 1; + continue; + } + + // Full block decode + int32_t block_bytes = (write_index_ * bit_width_ + 7) / 8; + const uint8_t* blk_ptr = + (const uint8_t*)in.get_wrapped_buf() + in.read_pos(); + in.wrapped_buf_advance_read_pos(static_cast(block_bytes)); + + int32_t prev = first_value_; + int32_t i = 0; + +#ifdef ENABLE_SIMD + // SIMD path: decode 8 values at a time (2 groups of 4) + for (; i + 7 < remaining; i += 8) { + int32_t need_bytes = ((i + 7) * bit_width_ + bit_width_ + 7) / 8 + + (bit_width_ > 16 ? 8 : 4); + if (need_bytes > block_bytes) break; + + int32_t grp_out[8]; + prev = simd_decode_4_i32(blk_ptr, bit_width_, delta_min_, i, prev, + grp_out); + prev = simd_decode_4_i32(blk_ptr, bit_width_, delta_min_, i + 4, + prev, grp_out + 4); + + memcpy(out + actual, grp_out, 8 * sizeof(int32_t)); + actual += 8; + } +#endif + + // Scalar tail + int32_t bit_pos = i * bit_width_; + for (; i < remaining; ++i) { + int64_t delta = scalar_read_bits(blk_ptr, bit_pos, bit_width_); + bit_pos += bit_width_; + int32_t val = (int32_t)delta + prev + delta_min_; + prev = val; + out[actual++] = val; + } + + // Block done, reset state + first_value_ = prev; + current_index_ = 0; + } + + return common::E_OK; +} + +// ============================================================================ +// Batch decode: INT64 +// ============================================================================ + +template <> +inline int TS2DIFFDecoder::read_batch_int64(int64_t* out, int capacity, + int& actual, + common::ByteStream& in) { + actual = 0; + + while (actual < capacity && has_remaining(in)) { + // If mid-block, finish per-value + if (current_index_ != 0) { + while (actual < capacity && current_index_ != 0 && + has_remaining(in)) { + out[actual++] = decode(in); + } + continue; + } + + // Start of a new block + if (!header_peeked_) { + read_header(in); + common::SerializationUtil::read_i64(delta_min_, in); + common::SerializationUtil::read_i64(first_value_, in); + bits_left_ = 0; + buffer_ = 0; + } + header_peeked_ = false; + + out[actual++] = first_value_; + + if (write_index_ == 0) { + current_index_ = 0; + continue; + } + + int32_t remaining = write_index_; + if (actual + remaining > capacity) { + // Block won't fit in output. Fall back to per-value decode. + // Stream is at packed data start; bits_left_/buffer_ are reset. + current_index_ = 1; + continue; + } + + int32_t block_bytes = (write_index_ * bit_width_ + 7) / 8; + // Direct pointer into the wrapped ByteStream buffer. + const uint8_t* blk_ptr = + (const uint8_t*)in.get_wrapped_buf() + in.read_pos(); + in.wrapped_buf_advance_read_pos(static_cast(block_bytes)); + + int64_t prev = first_value_; + int32_t i = 0; + +#ifdef ENABLE_SIMD + // SIMD path: decode 4 INT64 values at a time + for (; i + 3 < remaining; i += 4) { + int32_t need_bytes = + ((i + 3) * bit_width_ + bit_width_ + 7) / 8 + 8; + if (need_bytes > block_bytes) break; + + int64_t grp_out[4]; + prev = simd_decode_4_i64(blk_ptr, bit_width_, delta_min_, i, prev, + grp_out); + memcpy(out + actual, grp_out, 4 * sizeof(int64_t)); + actual += 4; + } +#endif + + // Scalar tail + int32_t bit_pos = i * bit_width_; + for (; i < remaining; ++i) { + int64_t delta = scalar_read_bits(blk_ptr, bit_pos, bit_width_); + bit_pos += bit_width_; + int64_t val = delta + prev + delta_min_; + prev = val; + out[actual++] = val; + } + + first_value_ = prev; + current_index_ = 0; + } + + return common::E_OK; +} + +// ============================================================================ +// Skip: INT32 — read header only, jump over packed data +// ============================================================================ + +template <> +inline int TS2DIFFDecoder::skip_int32(int count, int& skipped, + common::ByteStream& in) { + skipped = 0; + + // If mid-block, finish current block per-value + while (skipped < count && current_index_ != 0 && has_remaining(in)) { + decode(in); + ++skipped; + } + + // Skip whole blocks + while (skipped < count && has_remaining(in)) { + int32_t wi, bw, dm, fv; + common::SerializationUtil::read_i32(wi, in); + common::SerializationUtil::read_i32(bw, in); + common::SerializationUtil::read_i32(dm, in); + common::SerializationUtil::read_i32(fv, in); + + int32_t block_vals = wi + 1; + int32_t skip_bytes = (wi * bw + 7) / 8; + in.wrapped_buf_advance_read_pos(skip_bytes); + + skipped += block_vals; + // Reset decoder state + bits_left_ = 0; + buffer_ = 0; + current_index_ = 0; + write_index_ = -1; + } + + return common::E_OK; +} + +// ============================================================================ +// Skip: INT64 +// ============================================================================ + +template <> +inline int TS2DIFFDecoder::skip_int64(int count, int& skipped, + common::ByteStream& in) { + skipped = 0; + + while (skipped < count && current_index_ != 0 && has_remaining(in)) { + decode(in); + ++skipped; + } + + while (skipped < count && has_remaining(in)) { + int32_t wi, bw; + int64_t dm, fv; + common::SerializationUtil::read_i32(wi, in); + common::SerializationUtil::read_i32(bw, in); + common::SerializationUtil::read_i64(dm, in); + common::SerializationUtil::read_i64(fv, in); + + int32_t block_vals = wi + 1; + int32_t skip_bytes = (wi * bw + 7) / 8; + in.wrapped_buf_advance_read_pos(skip_bytes); + + skipped += block_vals; + bits_left_ = 0; + buffer_ = 0; + current_index_ = 0; + write_index_ = -1; + } + + return common::E_OK; +} + +// ============================================================================ +// Block-level filter check: peek header and compute value range +// ============================================================================ + +template <> +inline bool TS2DIFFDecoder::peek_next_block_range_int64( + common::ByteStream& in, int64_t& block_min, int64_t& block_max, + int& block_count) { + if (current_index_ != 0 || !has_remaining(in)) return false; + + read_header(in); + common::SerializationUtil::read_i64(delta_min_, in); + common::SerializationUtil::read_i64(first_value_, in); + bits_left_ = 0; + buffer_ = 0; + + block_min = first_value_; + block_count = write_index_ + 1; + + // Look-ahead: since timestamps are monotonically increasing, the true + // block_max is the last timestamp, which equals next block's first_value_. + // The next block header starts at read_pos + packed_bytes. first_value_ is + // at offset 16 within the header + // (write_index_(4)+bit_width_(4)+delta_min_(8)). We read it via raw pointer + // so the stream position is not consumed. + int32_t packed_bytes = (write_index_ * bit_width_ + 7) / 8; + if (in.remaining_size() >= (uint32_t)packed_bytes + 24) { + char* next_fv_ptr = + in.get_wrapped_buf() + in.read_pos() + packed_bytes + 16; + block_max = (int64_t)common::SerializationUtil::read_ui64(next_fv_ptr); + } else { + // Last block in page: fall back to conservative estimate. + if (write_index_ == 0 || bit_width_ == 0) { + block_max = first_value_ + (int64_t)write_index_ * delta_min_; + } else if (bit_width_ >= 63) { + block_max = INT64_MAX; + } else { + int64_t max_delta = delta_min_ + ((1LL << bit_width_) - 1); + block_max = first_value_ + (int64_t)write_index_ * max_delta; + } + } + + header_peeked_ = true; + return true; +} + +template <> +inline int TS2DIFFDecoder::skip_peeked_block_int64( + common::ByteStream& in, int& skipped) { + skipped = write_index_ + 1; + int32_t skip_bytes = (write_index_ * bit_width_ + 7) / 8; + in.wrapped_buf_advance_read_pos(skip_bytes); + header_peeked_ = false; + bits_left_ = 0; + buffer_ = 0; + current_index_ = 0; + write_index_ = -1; + return common::E_OK; +} + +// INT32 specialization: not applicable (timestamps are always INT64) +template <> +inline bool TS2DIFFDecoder::peek_next_block_range_int64( + common::ByteStream& in, int64_t& block_min, int64_t& block_max, + int& block_count) { + return false; +} + +template <> +inline int TS2DIFFDecoder::skip_peeked_block_int64( + common::ByteStream& in, int& skipped) { + return common::E_NOT_SUPPORT; +} + +// ============================================================================ +// Default (unsupported type) batch/skip — fall back to base class +// ============================================================================ + +template <> +inline int TS2DIFFDecoder::read_batch_int64(int64_t* out, int capacity, + int& actual, + common::ByteStream& in) { + return Decoder::read_batch_int64(out, capacity, actual, in); +} + +template <> +inline int TS2DIFFDecoder::skip_int64(int count, int& skipped, + common::ByteStream& in) { + return Decoder::skip_int64(count, skipped, in); +} + +template <> +inline int TS2DIFFDecoder::read_batch_int32(int32_t* out, int capacity, + int& actual, + common::ByteStream& in) { + return Decoder::read_batch_int32(out, capacity, actual, in); +} + +template <> +inline int TS2DIFFDecoder::skip_int32(int count, int& skipped, + common::ByteStream& in) { + return Decoder::skip_int32(count, skipped, in); +} + +// ============================================================================ +// Float / Double wrapper decoders (unchanged) +// ============================================================================ + class FloatTS2DIFFDecoder : public TS2DIFFDecoder { public: float decode(common::ByteStream& in) { @@ -179,11 +736,24 @@ class FloatTS2DIFFDecoder : public TS2DIFFDecoder { return common::int_to_float(value_int); } - int read_boolean(bool& ret_value, common::ByteStream& in); - int read_int32(int32_t& ret_value, common::ByteStream& in); - int read_int64(int64_t& ret_value, common::ByteStream& in); - int read_float(float& ret_value, common::ByteStream& in); - int read_double(double& ret_value, common::ByteStream& in); + int read_boolean(bool& ret_value, common::ByteStream& in) override; + int read_int32(int32_t& ret_value, common::ByteStream& in) override; + int read_int64(int64_t& ret_value, common::ByteStream& in) override; + int read_float(float& ret_value, common::ByteStream& in) override; + int read_double(double& ret_value, common::ByteStream& in) override; + + int read_batch_float(float* out, int capacity, int& actual, + common::ByteStream& in) override { + // Reuse SIMD batch decode for int32, then bit-cast to float + int32_t* buf = reinterpret_cast(out); + int ret = TS2DIFFDecoder::read_batch_int32(buf, capacity, + actual, in); + if (ret != common::E_OK) return ret; + for (int i = 0; i < actual; ++i) { + out[i] = common::int_to_float(buf[i]); + } + return common::E_OK; + } }; class DoubleTS2DIFFDecoder : public TS2DIFFDecoder { @@ -193,11 +763,24 @@ class DoubleTS2DIFFDecoder : public TS2DIFFDecoder { return common::long_to_double(value_long); } - int read_boolean(bool& ret_value, common::ByteStream& in); - int read_int32(int32_t& ret_value, common::ByteStream& in); - int read_int64(int64_t& ret_value, common::ByteStream& in); - int read_float(float& ret_value, common::ByteStream& in); - int read_double(double& ret_value, common::ByteStream& in); + int read_boolean(bool& ret_value, common::ByteStream& in) override; + int read_int32(int32_t& ret_value, common::ByteStream& in) override; + int read_int64(int64_t& ret_value, common::ByteStream& in) override; + int read_float(float& ret_value, common::ByteStream& in) override; + int read_double(double& ret_value, common::ByteStream& in) override; + + int read_batch_double(double* out, int capacity, int& actual, + common::ByteStream& in) override { + // Reuse SIMD batch decode for int64, then bit-cast to double + int64_t* buf = reinterpret_cast(out); + int ret = TS2DIFFDecoder::read_batch_int64(buf, capacity, + actual, in); + if (ret != common::E_OK) return ret; + for (int i = 0; i < actual; ++i) { + out[i] = common::long_to_double(buf[i]); + } + return common::E_OK; + } }; typedef TS2DIFFDecoder IntTS2DIFFDecoder; diff --git a/cpp/src/encoding/ts2diff_encoder.h b/cpp/src/encoding/ts2diff_encoder.h index 8c5ddafc7..b2b219b55 100644 --- a/cpp/src/encoding/ts2diff_encoder.h +++ b/cpp/src/encoding/ts2diff_encoder.h @@ -25,12 +25,9 @@ #include "common/allocator/alloc_base.h" #include "common/allocator/byte_stream.h" #include "encoder.h" -#if defined(__SSE4_2__) -#include -#define USE_SSE 1 -#elif defined(__AVX2__) -#include -#define USE_AVX2 1 + +#ifdef ENABLE_SIMD +#include "simde/x86/avx2.h" #endif namespace storage { @@ -40,15 +37,16 @@ struct SIMDOps; template <> struct SIMDOps { -#ifdef USE_SSE +#ifdef ENABLE_SIMD static void rebase(int32_t* arr, int32_t min_val, size_t size) { - const __m128i min_vec = _mm_set1_epi32(min_val); + const simde__m128i min_vec = simde_mm_set1_epi32(min_val); size_t i = 0; for (; i + 3 < size; i += 4) { - __m128i vec = - _mm_loadu_si128(reinterpret_cast(arr + i)); - vec = _mm_sub_epi32(vec, min_vec); - _mm_storeu_si128(reinterpret_cast<__m128i*>(arr + i), vec); + simde__m128i vec = simde_mm_loadu_si128( + reinterpret_cast(arr + i)); + vec = simde_mm_sub_epi32(vec, min_vec); + simde_mm_storeu_si128(reinterpret_cast(arr + i), + vec); } for (; i < size; ++i) { arr[i] -= min_val; @@ -65,15 +63,16 @@ struct SIMDOps { template <> struct SIMDOps { -#ifdef USE_AVX2 +#ifdef ENABLE_SIMD static void rebase(int64_t* arr, int64_t min_val, size_t size) { - const __m256i min_vec = _mm256_set1_epi64x(min_val); + const simde__m256i min_vec = simde_mm256_set1_epi64x(min_val); size_t i = 0; for (; i + 3 < size; i += 4) { - __m256i vec = - _mm256_loadu_si256(reinterpret_cast(arr + i)); - vec = _mm256_sub_epi64(vec, min_vec); - _mm256_storeu_si256(reinterpret_cast<__m256i*>(arr + i), vec); + simde__m256i vec = simde_mm256_loadu_si256( + reinterpret_cast(arr + i)); + vec = simde_mm256_sub_epi64(vec, min_vec); + simde_mm256_storeu_si256(reinterpret_cast(arr + i), + vec); } for (; i < size; ++i) { arr[i] -= min_val; @@ -95,7 +94,7 @@ class TS2DIFFEncoder : public Encoder { ~TS2DIFFEncoder() { destroy(); } - void reset() { write_index_ = -1; } + void reset() override { write_index_ = -1; } void init() { block_size_ = 128; @@ -111,7 +110,7 @@ class TS2DIFFEncoder : public Encoder { previous_value_ = 0; } - void destroy() { + void destroy() override { if (delta_arr_ != nullptr) { common::mem_free(delta_arr_); delta_arr_ = nullptr; @@ -163,17 +162,64 @@ class TS2DIFFEncoder : public Encoder { return bit_width; } + // Batch bit-pack `count` values (each `bit_width` bits, MSB-first within + // byte) into a single contiguous buffer and write it to out_stream in one + // call. Avoids the per-byte write_buf overhead of the scalar write_bits + // loop. + // + // Returns 0 on success, -1 if bit_width > 56 (accumulator overflow risk; + // caller should fall back to write_bits + flush_remaining). + template + static int pack_bits_msb(const U* values, int count, int bit_width, + common::ByteStream& out_stream) { + if (count <= 0 || bit_width <= 0) return 0; + if (bit_width > 56) return -1; // fall back + + size_t total_bytes = ((size_t)count * (size_t)bit_width + 7) / 8; + std::vector buf(total_bytes, 0); + + uint64_t accum = 0; + int bits_in_accum = 0; + size_t pos = 0; + const uint64_t mask = (1ULL << bit_width) - 1; + + for (int i = 0; i < count; i++) { + uint64_t v = static_cast(values[i]) & mask; + accum = (accum << bit_width) | v; + bits_in_accum += bit_width; + while (bits_in_accum >= 8) { + buf[pos++] = static_cast(accum >> (bits_in_accum - 8)); + bits_in_accum -= 8; + } + if (bits_in_accum > 0) { + accum &= ((1ULL << bits_in_accum) - 1); + } else { + accum = 0; + } + } + if (bits_in_accum > 0) { + buf[pos++] = static_cast(accum << (8 - bits_in_accum)); + } + out_stream.write_buf(buf.data(), pos); + return 0; + } + int do_encode(T value, common::ByteStream& out_stream); - int encode(bool value, common::ByteStream& out_stream); - int encode(int32_t value, common::ByteStream& out_stream); - int encode(int64_t value, common::ByteStream& out_stream); - int encode(float value, common::ByteStream& out_stream); - int encode(double value, common::ByteStream& out_stream); - int encode(common::String value, common::ByteStream& out_stream); + int encode(bool value, common::ByteStream& out_stream) override; + int encode(int32_t value, common::ByteStream& out_stream) override; + int encode(int64_t value, common::ByteStream& out_stream) override; + int encode(float value, common::ByteStream& out_stream) override; + int encode(double value, common::ByteStream& out_stream) override; + int encode(common::String value, common::ByteStream& out_stream) override; + + int encode_batch(const int32_t* values, uint32_t count, + common::ByteStream& out_stream) override; + int encode_batch(const int64_t* values, uint32_t count, + common::ByteStream& out_stream) override; - int flush(common::ByteStream& out_stream); + int flush(common::ByteStream& out_stream) override; - int get_max_byte_size() { + int get_max_byte_size() override { // The meaning of 24 is: index(4)+width(4)+minDeltaBase(8)+firstValue(8) return 24 + write_index_ * 8; } @@ -236,11 +282,14 @@ inline int TS2DIFFEncoder::flush(common::ByteStream& out_stream) { common::SerializationUtil::write_ui32(bit_width, out_stream); common::SerializationUtil::write_ui32(delta_arr_min_, out_stream); common::SerializationUtil::write_ui32(first_value_, out_stream); - // writer data - for (int i = 0; i < write_index_; i++) { - write_bits(delta_arr_[i], bit_width, out_stream); + // writer data — batched bit-pack + single write_buf for the common case; + // fall back to per-bit path for the rare wide bit_width. + if (pack_bits_msb(delta_arr_, write_index_, bit_width, out_stream) != 0) { + for (int i = 0; i < write_index_; i++) { + write_bits(delta_arr_[i], bit_width, out_stream); + } + flush_remaining(out_stream); } - flush_remaining(out_stream); reset(); return ret; } @@ -260,15 +309,202 @@ inline int TS2DIFFEncoder::flush(common::ByteStream& out_stream) { common::SerializationUtil::write_i32(bit_width, out_stream); common::SerializationUtil::write_i64(delta_arr_min_, out_stream); common::SerializationUtil::write_i64(first_value_, out_stream); - // writer data - for (int i = 0; i < write_index_; i++) { - write_bits(delta_arr_[i], bit_width, out_stream); + // writer data — batched bit-pack + single write_buf for the common case; + // fall back to per-bit path for the rare wide bit_width (>56). + if (pack_bits_msb(delta_arr_, write_index_, bit_width, out_stream) != 0) { + for (int i = 0; i < write_index_; i++) { + write_bits(delta_arr_[i], bit_width, out_stream); + } + flush_remaining(out_stream); } - flush_remaining(out_stream); reset(); // 语义,writeIndex=-1; return ret; } +// ============================================================================ +// Batch encode: INT32 +// Adjacent-difference removes sequential dependency; SIMD for delta + min/max. +// ============================================================================ + +template <> +inline int TS2DIFFEncoder::encode_batch( + const int32_t* values, uint32_t count, common::ByteStream& out_stream) { + int ret = common::E_OK; + uint32_t offset = 0; + + while (offset < count) { + // Start of new block: store first_value + if (write_index_ == -1) { + first_value_ = values[offset]; + previous_value_ = first_value_; + write_index_ = 0; + offset++; + continue; + } + + // How many deltas fit in current block + uint32_t space = static_cast(block_size_) - write_index_; + uint32_t batch = std::min(count - offset, space); + + // ── Adjacent difference: delta[i] = values[i] - values[i-1] ── + // First delta uses previous_value_ + delta_arr_[write_index_] = values[offset] - previous_value_; + + uint32_t i = 1; +#ifdef ENABLE_SIMD + // SIMD: 4 adjacent differences at a time + for (; i + 3 < batch; i += 4) { + simde__m128i cur = simde_mm_loadu_si128( + reinterpret_cast(values + offset + i)); + simde__m128i prv = simde_mm_loadu_si128( + reinterpret_cast(values + offset + i - 1)); + simde__m128i diff = simde_mm_sub_epi32(cur, prv); + simde_mm_storeu_si128( + reinterpret_cast(delta_arr_ + write_index_ + i), + diff); + } +#endif + for (; i < batch; i++) { + delta_arr_[write_index_ + i] = + values[offset + i] - values[offset + i - 1]; + } + previous_value_ = values[offset + batch - 1]; + + // ── Min/max of new deltas ── + int32_t local_min = delta_arr_[write_index_]; + int32_t local_max = delta_arr_[write_index_]; + + uint32_t j = 1; +#ifdef ENABLE_SIMD + if (batch >= 5) { + simde__m128i vmin = simde_mm_set1_epi32(local_min); + simde__m128i vmax = vmin; + for (; j + 3 < batch; j += 4) { + simde__m128i v = + simde_mm_loadu_si128(reinterpret_cast( + delta_arr_ + write_index_ + j)); + vmin = simde_mm_min_epi32(vmin, v); + vmax = simde_mm_max_epi32(vmax, v); + } + // Horizontal reduce + int32_t tmp[4]; + simde_mm_storeu_si128(reinterpret_cast(tmp), vmin); + for (int k = 0; k < 4; k++) + if (tmp[k] < local_min) local_min = tmp[k]; + simde_mm_storeu_si128(reinterpret_cast(tmp), vmax); + for (int k = 0; k < 4; k++) + if (tmp[k] > local_max) local_max = tmp[k]; + } +#endif + for (; j < batch; j++) { + int32_t d = delta_arr_[write_index_ + j]; + if (d < local_min) local_min = d; + if (d > local_max) local_max = d; + } + + // Merge with block min/max + if (write_index_ == 0) { + delta_arr_min_ = local_min; + delta_arr_max_ = local_max; + } else { + if (local_min < delta_arr_min_) delta_arr_min_ = local_min; + if (local_max > delta_arr_max_) delta_arr_max_ = local_max; + } + + write_index_ += batch; + offset += batch; + + if (write_index_ >= block_size_) { + if (RET_FAIL(flush(out_stream))) return ret; + } + } + return ret; +} + +// ============================================================================ +// Batch encode: INT64 +// ============================================================================ + +template <> +inline int TS2DIFFEncoder::encode_batch( + const int64_t* values, uint32_t count, common::ByteStream& out_stream) { + int ret = common::E_OK; + uint32_t offset = 0; + + while (offset < count) { + if (write_index_ == -1) { + first_value_ = values[offset]; + previous_value_ = first_value_; + write_index_ = 0; + offset++; + continue; + } + + uint32_t space = static_cast(block_size_) - write_index_; + uint32_t batch = std::min(count - offset, space); + + // Adjacent difference + delta_arr_[write_index_] = values[offset] - previous_value_; + + uint32_t i = 1; +#ifdef ENABLE_SIMD + // SIMD: 2 adjacent differences at a time (128-bit, native NEON) + for (; i + 1 < batch; i += 2) { + simde__m128i cur = simde_mm_loadu_si128( + reinterpret_cast(values + offset + i)); + simde__m128i prv = simde_mm_loadu_si128( + reinterpret_cast(values + offset + i - 1)); + simde__m128i diff = simde_mm_sub_epi64(cur, prv); + simde_mm_storeu_si128( + reinterpret_cast(delta_arr_ + write_index_ + i), + diff); + } +#endif + for (; i < batch; i++) { + delta_arr_[write_index_ + i] = + values[offset + i] - values[offset + i - 1]; + } + previous_value_ = values[offset + batch - 1]; + + // Min/max (scalar — no efficient 64-bit SIMD min/max before AVX-512) + int64_t local_min = delta_arr_[write_index_]; + int64_t local_max = delta_arr_[write_index_]; + for (uint32_t j = 1; j < batch; j++) { + int64_t d = delta_arr_[write_index_ + j]; + if (d < local_min) local_min = d; + if (d > local_max) local_max = d; + } + + if (write_index_ == 0) { + delta_arr_min_ = local_min; + delta_arr_max_ = local_max; + } else { + if (local_min < delta_arr_min_) delta_arr_min_ = local_min; + if (local_max > delta_arr_max_) delta_arr_max_ = local_max; + } + + write_index_ += batch; + offset += batch; + + if (write_index_ >= block_size_) { + if (RET_FAIL(flush(out_stream))) return ret; + } + } + return ret; +} + +// Default: unsupported types fall back to base class loop +template +int TS2DIFFEncoder::encode_batch(const int32_t* values, uint32_t count, + common::ByteStream& out) { + return Encoder::encode_batch(values, count, out); +} +template +int TS2DIFFEncoder::encode_batch(const int64_t* values, uint32_t count, + common::ByteStream& out) { + return Encoder::encode_batch(values, count, out); +} + class FloatTS2DIFFEncoder : public TS2DIFFEncoder { public: int do_encode(float value, common::ByteStream& out_stream) { diff --git a/cpp/src/file/read_file.cc b/cpp/src/file/read_file.cc index dd1c42dad..1807883a8 100644 --- a/cpp/src/file/read_file.cc +++ b/cpp/src/file/read_file.cc @@ -21,18 +21,18 @@ #include #include +#include + +#include "common/logger/elog.h" +#include "common/tsfile_common.h" + #ifdef _WIN32 #include #include + ssize_t pread(int fd, void* buf, size_t count, uint64_t offset); -#else -#include #endif -#include "common/logger/elog.h" -#include "common/tsfile_common.h" -#include "utils/util_define.h" // ssize_t and other platform-compat shims - using namespace common; namespace storage { diff --git a/cpp/src/file/restorable_tsfile_io_writer.cc b/cpp/src/file/restorable_tsfile_io_writer.cc index 22a3fb500..d98cdff65 100644 --- a/cpp/src/file/restorable_tsfile_io_writer.cc +++ b/cpp/src/file/restorable_tsfile_io_writer.cc @@ -328,12 +328,8 @@ static int recover_chunk_statistic( uint32_t value_buf_size = 0; std::vector time_decode_buf; const std::vector* times = nullptr; - std::vector aligned_value_notnull_bitmap; - uint32_t aligned_num_values = 0; - const bool is_aligned_value_chunk = - (time_batch != nullptr && !time_batch->empty()); - if (is_aligned_value_chunk) { + if (time_batch != nullptr && !time_batch->empty()) { // Aligned value page: uncompressed layout = uint32(num_values) + bitmap // + value_buf if (uncompressed_size < 4) { @@ -341,7 +337,7 @@ static int recover_chunk_statistic( CompressorFactory::free(compressor); return E_OK; } - aligned_num_values = + uint32_t num_values = (static_cast( static_cast(uncompressed_buf[0])) << 24) | @@ -353,17 +349,12 @@ static int recover_chunk_statistic( << 8) | (static_cast( static_cast(uncompressed_buf[3]))); - uint32_t bitmap_size = (aligned_num_values + 7) / 8; + uint32_t bitmap_size = (num_values + 7) / 8; if (uncompressed_size < 4 + bitmap_size) { compressor->after_uncompress(uncompressed_buf); CompressorFactory::free(compressor); return E_OK; } - aligned_value_notnull_bitmap.resize(bitmap_size); - if (bitmap_size > 0) { - std::memcpy(aligned_value_notnull_bitmap.data(), - uncompressed_buf + 4, bitmap_size); - } value_buf = uncompressed_buf + 4 + bitmap_size; value_buf_size = uncompressed_size - 4 - bitmap_size; times = time_batch; @@ -419,25 +410,8 @@ static int recover_chunk_statistic( value_decoder->reset(); size_t idx = 0; const size_t num_times = times->size(); - while (idx < num_times) { + while (idx < num_times && value_decoder->has_remaining(value_in)) { int64_t t = (*times)[idx]; - bool has_value = true; - if (is_aligned_value_chunk) { - has_value = false; - const uint32_t byte_idx = static_cast(idx / 8); - const uint32_t bit_shift = static_cast(idx % 8); - if (byte_idx < aligned_value_notnull_bitmap.size()) { - has_value = ((aligned_value_notnull_bitmap[byte_idx] & 0xFF) & - (0x80 >> bit_shift)) != 0; - } - } - if (!has_value) { - idx++; - continue; - } - if (!value_decoder->has_remaining(value_in)) { - break; - } switch (chdr.data_type_) { case common::BOOLEAN: { bool v; @@ -518,7 +492,6 @@ void RestorableTsFileIOWriter::close() { write_file_ = nullptr; write_file_owned_ = false; } - TsFileIOWriter::destroy(); for (ChunkGroupMeta* cgm : self_check_recovered_cgm_) { cgm->device_id_.reset(); } @@ -842,12 +815,9 @@ int RestorableTsFileIOWriter::self_check(bool truncate_corrupted) { } } - // --- Attach recovered ChunkGroupMeta to writer; record per-CGM prefix - // length so destroy() can free stats appended later. --- - recovery_chunk_meta_prefix_.clear(); + // --- Attach recovered ChunkGroupMeta to writer; destroy() will not free + // them --- for (ChunkGroupMeta* cgm : recovered_cgm_list) { - recovery_chunk_meta_prefix_[cgm] = - static_cast(cgm->chunk_meta_list_.size()); push_chunk_group_meta(cgm); } chunk_group_meta_from_recovery_ = true; diff --git a/cpp/src/file/tsfile_io_reader.cc b/cpp/src/file/tsfile_io_reader.cc index e96008a47..03ba7af1f 100644 --- a/cpp/src/file/tsfile_io_reader.cc +++ b/cpp/src/file/tsfile_io_reader.cc @@ -51,6 +51,8 @@ void TsFileIOReader::reset() { } read_file_ = nullptr; tsfile_meta_page_arena_.destroy(); + device_node_cache_.clear(); + device_node_cache_pa_.destroy(); tsfile_meta_ready_ = false; } } @@ -61,6 +63,9 @@ int TsFileIOReader::alloc_ssi(std::shared_ptr device_id, common::PageArena& pa, Filter* time_filter) { int ret = E_OK; if (RET_FAIL(load_tsfile_meta_if_necessary())) { + } else if (!bloom_filter_contains(device_id->get_device_name(), + measurement_name)) { + return E_NO_MORE_DATA; } else { ssi = new TsFileSeriesScanIterator; ssi->init(device_id, measurement_name, read_file_, time_filter, pa); @@ -80,6 +85,95 @@ int TsFileIOReader::alloc_ssi(std::shared_ptr device_id, return ret; } +int TsFileIOReader::alloc_multi_ssi( + std::shared_ptr device_id, + const std::vector& measurement_names, + TsFileSeriesScanIterator*& ssi, common::PageArena& pa, + Filter* time_filter) { + int ret = E_OK; + if (RET_FAIL(load_tsfile_meta_if_necessary())) return ret; + + ssi = new TsFileSeriesScanIterator; + ssi->init(device_id, measurement_names.empty() ? "" : measurement_names[0], + read_file_, time_filter, pa); + + auto& ssi_pa = ssi->timeseries_index_pa_; + + // Use cached device measurement node (avoids repeated file I/O) + CachedDeviceNode* cached = get_cached_device_node(device_id, ssi_pa); + if (cached == nullptr) { + delete ssi; + ssi = nullptr; + return E_NOT_EXIST; + } + auto top_node = cached->top_node; + if (!cached->is_aligned) { + delete ssi; + ssi = nullptr; + return E_NOT_SUPPORT; + } + + // Get time column metadata + TimeseriesIndex* time_ts_idx = nullptr; + if (RET_FAIL(get_time_column_metadata(top_node, time_ts_idx, ssi_pa))) { + delete ssi; + ssi = nullptr; + return ret; + } + + // Create MultiAlignedTimeseriesIndex + void* multi_buf = ssi_pa.alloc(sizeof(MultiAlignedTimeseriesIndex)); + if (IS_NULL(multi_buf)) { + delete ssi; + ssi = nullptr; + return E_OOM; + } + auto* multi_idx = new (multi_buf) MultiAlignedTimeseriesIndex; + multi_idx->time_ts_idx_ = time_ts_idx; + + // Load each measurement's TimeseriesIndex + for (const auto& meas_name : measurement_names) { + std::shared_ptr meas_entry; + int64_t meas_end_offset = 0; + if (RET_FAIL(load_measurement_index_entry( + meas_name, top_node, meas_entry, meas_end_offset))) { + // Measurement not found — abort multi path + delete ssi; + ssi = nullptr; + return ret; + } + + ITimeseriesIndex* ts_idx = nullptr; + if (RET_FAIL(do_load_timeseries_index( + meas_name, meas_entry->get_offset(), meas_end_offset, ssi_pa, + ts_idx, /*is_aligned=*/true))) { + delete ssi; + ssi = nullptr; + return ret; + } + + auto* aligned_idx = dynamic_cast(ts_idx); + if (aligned_idx && aligned_idx->value_ts_idx_) { + multi_idx->value_ts_idxs_.push_back(aligned_idx->value_ts_idx_); + } else { + delete ssi; + ssi = nullptr; + return E_NOT_EXIST; + } + } + + ssi->itimeseries_index_ = multi_idx; + + // Skip global statistic filter for multi — per-chunk filtering still works. + + if (RET_FAIL(ssi->init_chunk_reader())) { + ssi->destroy(); + delete ssi; + ssi = nullptr; + } + return ret; +} + void TsFileIOReader::revert_ssi(TsFileSeriesScanIterator* ssi) { if (ssi != nullptr) { ssi->destroy(); @@ -96,47 +190,64 @@ int TsFileIOReader::get_device_timeseries_meta_without_chunk_meta( int64_t end_offset; std::vector, int64_t>> meta_index_entry_list; - std::shared_ptr top_node; - bool is_aligned = false; - TimeseriesIndex* time_timeseries_index = nullptr; if (RET_FAIL(load_device_index_entry( std::make_shared(device_id), meta_index_entry, end_offset))) { - } else { - int64_t start_offset = meta_index_entry->get_offset(); - ASSERT(start_offset < end_offset); - const int32_t read_size = end_offset - start_offset; - int32_t ret_read_len = 0; - char* data_buf = (char*)pa.alloc(read_size); - void* m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode)); - if (IS_NULL(data_buf) || IS_NULL(m_idx_node_buf)) { - return E_OOM; - } - auto* top_node_ptr = new (m_idx_node_buf) MetaIndexNode(&pa); - top_node = std::shared_ptr(top_node_ptr, - MetaIndexNode::self_deleter); - if (RET_FAIL(read_file_->read(start_offset, data_buf, read_size, - ret_read_len))) { - } else if (RET_FAIL(top_node->deserialize_from(data_buf, read_size))) { - } else { - is_aligned = is_aligned_device(top_node); - if (is_aligned) { - if (RET_FAIL(get_time_column_metadata( - top_node, time_timeseries_index, pa))) { - return ret; - } - } + } else if (RET_FAIL(load_all_measurement_index_entry( + meta_index_entry->get_offset(), end_offset, pa, + meta_index_entry_list))) { + } else if (RET_FAIL(do_load_all_timeseries_index(meta_index_entry_list, pa, + timeseries_indexs))) { + } + return ret; +} + +int TsFileIOReader::get_device_timeseries_meta_by_offset( + int64_t start_offset, int64_t end_offset, + std::vector& timeseries_indexs, PageArena& pa) { + int ret = E_OK; + load_tsfile_meta_if_necessary(); + + std::vector, int64_t>> + meta_index_entry_list; + bool is_aligned = false; + TimeseriesIndex* time_timeseries_index = nullptr; + + ASSERT(start_offset < end_offset); + const int32_t read_size = end_offset - start_offset; + int32_t ret_read_len = 0; + char* data_buf = (char*)pa.alloc(read_size); + void* m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode)); + if (IS_NULL(data_buf) || IS_NULL(m_idx_node_buf)) { + return E_OOM; + } + auto* top_node_ptr = new (m_idx_node_buf) MetaIndexNode(&pa); + auto top_node = std::shared_ptr(top_node_ptr, + MetaIndexNode::self_deleter); + if (RET_FAIL(read_file_->read(start_offset, data_buf, read_size, + ret_read_len))) { + return ret; + } + if (RET_FAIL(top_node->deserialize_from(data_buf, read_size))) { + return ret; + } + + is_aligned = is_aligned_device(top_node); + if (is_aligned) { + if (RET_FAIL(get_time_column_metadata(top_node, time_timeseries_index, + pa))) { + return ret; } } - if (RET_FAIL(ret)) { + + get_all_leaf(top_node, meta_index_entry_list); + + if (RET_FAIL(do_load_all_timeseries_index(meta_index_entry_list, pa, + timeseries_indexs))) { return ret; } - if (RET_FAIL(load_all_measurement_index_entry( - meta_index_entry->get_offset(), end_offset, pa, - meta_index_entry_list))) { - } else if (RET_FAIL(do_load_all_timeseries_index(meta_index_entry_list, pa, - timeseries_indexs))) { - } else if (is_aligned && time_timeseries_index != nullptr) { + + if (is_aligned && time_timeseries_index != nullptr) { for (size_t i = 0; i < timeseries_indexs.size(); i++) { void* buf = pa.alloc(sizeof(AlignedTimeseriesIndex)); if (IS_NULL(buf)) { @@ -161,6 +272,20 @@ bool TsFileIOReader::filter_stasify(ITimeseriesIndex* ts_index, return time_filter->satisfy(ts_index->get_statistic()); } +bool TsFileIOReader::bloom_filter_contains( + const std::string& device_name, const std::string& measurement_name) { + BloomFilter* bf = tsfile_meta_.bloom_filter_; + if (bf == nullptr || bf->is_empty()) { + return true; // no bloom filter — assume present + } + common::String dev_str, meas_str; + dev_str.buf_ = const_cast(device_name.c_str()); + dev_str.len_ = static_cast(device_name.size()); + meas_str.buf_ = const_cast(measurement_name.c_str()); + meas_str.len_ = static_cast(measurement_name.size()); + return bf->contains(dev_str, meas_str); +} + int TsFileIOReader::load_tsfile_meta_if_necessary() { int ret = E_OK; if (!tsfile_meta_ready_) { @@ -259,44 +384,68 @@ int TsFileIOReader::load_tsfile_meta() { return ret; } -int TsFileIOReader::load_timeseries_index_for_ssi( - std::shared_ptr device_id, const std::string& measurement_name, - TsFileSeriesScanIterator*& ssi) { +TsFileIOReader::CachedDeviceNode* TsFileIOReader::get_cached_device_node( + std::shared_ptr device_id, common::PageArena& pa) { + std::string dev_name = device_id->get_device_name(); + auto it = device_node_cache_.find(dev_name); + if (it != device_node_cache_.end()) { + return &it->second; + } + int ret = E_OK; std::shared_ptr device_index_entry; int64_t device_ie_end_offset = 0; - std::shared_ptr measurement_index_entry; - int64_t measurement_ie_end_offset = 0; - // bool is_aligned = false; if (RET_FAIL(load_device_index_entry( std::make_shared(device_id), device_index_entry, device_ie_end_offset))) { - return ret; + return nullptr; } - auto& pa = ssi->timeseries_index_pa_; int64_t start_offset = device_index_entry->get_offset(), end_offset = device_ie_end_offset; ASSERT(start_offset < end_offset); const int32_t read_size = end_offset - start_offset; int32_t ret_read_len = 0; - char* data_buf = (char*)pa.alloc(read_size); - void* m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode)); + // Allocate from the reader's cache arena so the node outlives any SSI + char* data_buf = (char*)device_node_cache_pa_.alloc(read_size); + void* m_idx_node_buf = device_node_cache_pa_.alloc(sizeof(MetaIndexNode)); if (IS_NULL(data_buf) || IS_NULL(m_idx_node_buf)) { - return E_OOM; + return nullptr; } - auto* top_node_ptr = new (m_idx_node_buf) MetaIndexNode(&pa); + auto* top_node_ptr = + new (m_idx_node_buf) MetaIndexNode(&device_node_cache_pa_); auto top_node = std::shared_ptr(top_node_ptr, MetaIndexNode::self_deleter); if (RET_FAIL(read_file_->read(start_offset, data_buf, read_size, ret_read_len))) { - return ret; - } else if (RET_FAIL(top_node->deserialize_from(data_buf, read_size))) { - return ret; + return nullptr; + } + if (RET_FAIL(top_node->deserialize_from(data_buf, read_size))) { + return nullptr; } - bool is_aligned = is_aligned_device(top_node); + CachedDeviceNode cached; + cached.top_node = top_node; + cached.is_aligned = is_aligned_device(top_node); + auto insert_result = + device_node_cache_.emplace(std::move(dev_name), cached); + return &insert_result.first->second; +} + +int TsFileIOReader::load_timeseries_index_for_ssi( + std::shared_ptr device_id, const std::string& measurement_name, + TsFileSeriesScanIterator*& ssi) { + int ret = E_OK; + auto& pa = ssi->timeseries_index_pa_; + + CachedDeviceNode* cached = get_cached_device_node(device_id, pa); + if (cached == nullptr) { + return E_NOT_EXIST; + } + auto top_node = cached->top_node; + bool is_aligned = cached->is_aligned; + TimeseriesIndex* timeseries_index = nullptr; if (is_aligned) { if (RET_FAIL( @@ -305,6 +454,8 @@ int TsFileIOReader::load_timeseries_index_for_ssi( } } + std::shared_ptr measurement_index_entry; + int64_t measurement_ie_end_offset = 0; if (RET_FAIL(load_measurement_index_entry(measurement_name, top_node, measurement_index_entry, measurement_ie_end_offset))) { @@ -347,12 +498,15 @@ int TsFileIOReader::load_device_index_entry( } std::string table_name = device_id_comparable->device_id_->get_table_name(); auto it = tsfile_meta_.table_metadata_index_node_map_.find(table_name); - if (it == tsfile_meta_.table_metadata_index_node_map_.end() || - it->second == nullptr) { + if (it == tsfile_meta_.table_metadata_index_node_map_.end()) { return E_DEVICE_NOT_EXIST; } auto index_node = it->second; + if (index_node == nullptr) { + return E_DEVICE_NOT_EXIST; + } if (index_node->node_type_ == LEAF_DEVICE) { + // FIXME ret = index_node->binary_search_children( device_name, true, device_index_entry, end_offset); } else { @@ -613,6 +767,9 @@ int TsFileIOReader::search_from_internal_node( bool TsFileIOReader::is_aligned_device( std::shared_ptr measurement_node) { + if (measurement_node->children_.empty()) { + return false; + } auto entry = measurement_node->children_[0]; return entry->get_name().is_null() || entry->get_name().to_std_string() == ""; diff --git a/cpp/src/file/tsfile_io_reader.h b/cpp/src/file/tsfile_io_reader.h index 2f4135e0e..506aa7f47 100644 --- a/cpp/src/file/tsfile_io_reader.h +++ b/cpp/src/file/tsfile_io_reader.h @@ -20,6 +20,7 @@ #ifndef FILE_TSFILE_IO_REAER_H #define FILE_TSFILE_IO_REAER_H +#include #include #include "common/tsblock/tsblock.h" @@ -46,6 +47,7 @@ class TsFileIOReader { tsfile_meta_ready_(false), read_file_created_(false) { tsfile_meta_page_arena_.init(512, common::MOD_TSFILE_READER); + device_node_cache_pa_.init(512, common::MOD_TSFILE_READER); } int init(const std::string& file_path); @@ -59,6 +61,11 @@ class TsFileIOReader { TsFileSeriesScanIterator*& ssi, common::PageArena& pa, Filter* time_filter = nullptr); + int alloc_multi_ssi(std::shared_ptr device_id, + const std::vector& measurement_names, + TsFileSeriesScanIterator*& ssi, common::PageArena& pa, + Filter* time_filter = nullptr); + void revert_ssi(TsFileSeriesScanIterator* ssi); std::string get_file_path() const { return read_file_->file_path(); } @@ -84,6 +91,11 @@ class TsFileIOReader { std::vector& timeseries_indexs, common::PageArena& pa); + int get_device_timeseries_meta_by_offset( + int64_t start_offset, int64_t end_offset, + std::vector& timeseries_indexs, + common::PageArena& pa); + private: FORCE_INLINE int64_t file_size() const { return read_file_->file_size(); } @@ -142,17 +154,31 @@ class TsFileIOReader { bool filter_stasify(ITimeseriesIndex* ts_index, Filter* time_filter); + bool bloom_filter_contains(const std::string& device_name, + const std::string& measurement_name); + int get_all_leaf( std::shared_ptr index_node, std::vector, int64_t>>& index_node_entry_list); + struct CachedDeviceNode { + std::shared_ptr top_node; + bool is_aligned; + }; + + CachedDeviceNode* get_cached_device_node( + std::shared_ptr device_id, common::PageArena& pa); + private: ReadFile* read_file_; common::PageArena tsfile_meta_page_arena_; TsFileMeta tsfile_meta_; bool tsfile_meta_ready_; bool read_file_created_; + // Cache: device_name → deserialized measurement MetaIndexNode + common::PageArena device_node_cache_pa_; + std::unordered_map device_node_cache_; }; } // end namespace storage diff --git a/cpp/src/file/tsfile_io_writer.cc b/cpp/src/file/tsfile_io_writer.cc index 343884448..156d45bb7 100644 --- a/cpp/src/file/tsfile_io_writer.cc +++ b/cpp/src/file/tsfile_io_writer.cc @@ -21,6 +21,8 @@ #include +#include +#include #include #include "common/device_id.h" @@ -40,71 +42,46 @@ namespace storage { #define OFFSET_DEBUG(msg) void(msg) #endif +int64_t TsFileIOWriter::get_meta_size() const { + return meta_allocator_.get_total_used_bytes(); +} + int TsFileIOWriter::init(WriteFile* write_file) { int ret = E_OK; const uint32_t page_size = 1024; meta_allocator_.init(page_size, MOD_TSFILE_WRITER_META); chunk_meta_count_ = 0; - recovery_chunk_meta_prefix_.clear(); - destroyed_ = false; file_ = write_file; return ret; } void TsFileIOWriter::destroy() { - if (destroyed_) { - return; - } - // Recovery attaches a prefix of ChunkGroupMeta; device_id and chunk stats - // in that snapshot live in reader/recovery memory. After open, new chunks - // may be pushed into the same ChunkGroupMeta (same device); only those - // appended ChunkMeta need statistic_->destroy() (see - // recovery_chunk_meta_prefix_). - for (auto iter = chunk_group_meta_list_.begin(); - iter != chunk_group_meta_list_.end(); iter++) { - ChunkGroupMeta* cgm = iter.get(); - auto prefix_it = recovery_chunk_meta_prefix_.find(cgm); - const bool is_recovery_cgm = - chunk_group_meta_from_recovery_ && cgm != nullptr && - prefix_it != recovery_chunk_meta_prefix_.end(); - uint32_t recovered_cm_count = is_recovery_cgm ? prefix_it->second : 0; - - if (!is_recovery_cgm) { - if (cgm != nullptr && cgm->device_id_) { - cgm->device_id_.reset(); + // When meta came from RestorableTsFileIOWriter recovery, entries live in + // an arena there; do not release device_id_/statistic_ here. + if (!chunk_group_meta_from_recovery_) { + for (auto iter = chunk_group_meta_list_.begin(); + iter != chunk_group_meta_list_.end(); iter++) { + if (iter.get() && iter.get()->device_id_) { + iter.get()->device_id_.reset(); } - } - - if (cgm == nullptr) { - continue; - } - uint32_t cm_idx = 0; - for (auto chunk_meta = cgm->chunk_meta_list_.begin(); - chunk_meta != cgm->chunk_meta_list_.end(); - chunk_meta++, cm_idx++) { - if (chunk_meta.get() == nullptr || - chunk_meta.get()->statistic_ == nullptr) { - continue; - } - if (is_recovery_cgm && cm_idx < recovered_cm_count) { - continue; + if (iter.get()) { + for (auto chunk_meta = iter.get()->chunk_meta_list_.begin(); + chunk_meta != iter.get()->chunk_meta_list_.end(); + chunk_meta++) { + if (chunk_meta.get()) { + chunk_meta.get()->statistic_->destroy(); + } + } } - chunk_meta.get()->statistic_->destroy(); } } - if (cur_chunk_meta_ != nullptr && cur_chunk_meta_->statistic_ != nullptr) { - cur_chunk_meta_->statistic_->destroy(); - cur_chunk_meta_ = nullptr; - } - meta_allocator_.destroy(); write_stream_.destroy(); if (write_file_created_ && file_ != nullptr) { delete file_; file_ = nullptr; } - destroyed_ = true; } int TsFileIOWriter::start_file() { @@ -130,13 +107,11 @@ int TsFileIOWriter::start_flush_chunk_group( cur_device_name_ = device_name; ASSERT(cur_chunk_group_meta_ == nullptr); use_prev_alloc_cgm_ = false; - for (auto iter = chunk_group_meta_list_.begin(); - iter != chunk_group_meta_list_.end(); iter++) { - if (*iter.get()->device_id_ == *cur_device_name_) { - use_prev_alloc_cgm_ = true; - cur_chunk_group_meta_ = iter.get(); - break; - } + // O(1) lookup via hash map instead of O(N) linked-list scan. + auto it = chunk_group_meta_index_.find(device_name->get_device_name()); + if (it != chunk_group_meta_index_.end()) { + use_prev_alloc_cgm_ = true; + cur_chunk_group_meta_ = it->second; } if (!use_prev_alloc_cgm_) { void* buf = meta_allocator_.alloc(sizeof(*cur_chunk_group_meta_)); @@ -258,6 +233,8 @@ int TsFileIOWriter::end_flush_chunk_group(bool is_aligned) { cur_chunk_group_meta_ = nullptr; return common::E_OK; } + chunk_group_meta_index_[cur_device_name_->get_device_name()] = + cur_chunk_group_meta_; int ret = chunk_group_meta_list_.push_back(cur_chunk_group_meta_); cur_chunk_group_meta_ = nullptr; return ret; @@ -269,17 +246,19 @@ int TsFileIOWriter::end_file() { return E_OK; } OFFSET_DEBUG("before end file"); + if (RET_FAIL(write_log_index_range())) { std::cout << "writer range index error, ret =" << ret << std::endl; } else if (RET_FAIL(write_file_index())) { std::cout << "writer file index error, ret = " << ret << std::endl; } else if (RET_FAIL(write_file_footer())) { std::cout << "writer file footer error, ret = " << ret << std::endl; - } else if (RET_FAIL(sync_file())) { + } else if (g_config_value_.sync_on_close_ && RET_FAIL(sync_file())) { std::cout << "sync file error, ret = " << ret << std::endl; } else if (RET_FAIL(close_file())) { std::cout << "close file error, ret = " << ret << std::endl; } + return ret; } diff --git a/cpp/src/file/tsfile_io_writer.h b/cpp/src/file/tsfile_io_writer.h index 088e52f56..b65218f82 100644 --- a/cpp/src/file/tsfile_io_writer.h +++ b/cpp/src/file/tsfile_io_writer.h @@ -21,6 +21,7 @@ #define FILE_TSFILE_IO_WRITER_H #include +#include #include #include "common/allocator/page_arena.h" @@ -108,6 +109,7 @@ class TsFileIOWriter { FORCE_INLINE std::string get_file_path() { return file_->get_file_path(); } FORCE_INLINE std::shared_ptr get_schema() { return schema_; } + int64_t get_meta_size() const; private: int write_log_index_range(); @@ -191,13 +193,13 @@ class TsFileIOWriter { /** For RestorableTsFileIOWriter: append a recovered ChunkGroupMeta. */ void push_chunk_group_meta(ChunkGroupMeta* cgm) { chunk_group_meta_list_.push_back(cgm); + if (cgm->device_id_) { + chunk_group_meta_index_[cgm->device_id_->get_device_name()] = cgm; + } } - /** True when chunk_group_meta_list_ has a prefix loaded from recovery; - * destroy() must not free device_id_/statistic_ for that prefix only. */ + /** True when chunk_group_meta_list_ entries are from recovery arena; + * destroy() must not free them. */ bool chunk_group_meta_from_recovery_ = false; - /** Recovered ChunkGroupMeta* -> chunk_meta_list_.size() at attach (pointer - * keys avoid idx skew). */ - std::map recovery_chunk_meta_prefix_; /** * Recovery only: set file_base_offset_ so that cur_file_position() returns * correct absolute offsets. After recovery the writer behaves as if the @@ -214,6 +216,9 @@ class TsFileIOWriter { ChunkGroupMeta* cur_chunk_group_meta_; int32_t chunk_meta_count_; // for debug common::SimpleList chunk_group_meta_list_; + // O(1) lookup for existing ChunkGroupMeta by device name, avoiding the + // O(N) linear scan through chunk_group_meta_list_ per device. + std::unordered_map chunk_group_meta_index_; bool use_prev_alloc_cgm_; // chunk group meta std::shared_ptr cur_device_name_; WriteFile* file_; @@ -227,10 +232,6 @@ class TsFileIOWriter { /** Recovery only: absolute file offset at which write_stream_ logically * begins. Normal (non-recovery) path keeps this at 0. */ int64_t file_base_offset_ = 0; - /** Set after destroy() completes; avoids double cleanup when - * RestorableTsFileIOWriter::close() calls destroy() before - * self_check_arena_.destroy(), then ~TsFileIOWriter runs again. */ - bool destroyed_ = false; friend class RestorableTsFileIOWriter; // uses push_chunk_group_meta }; diff --git a/cpp/src/file/write_file.cc b/cpp/src/file/write_file.cc index b6fbd6e44..8ad96fab2 100644 --- a/cpp/src/file/write_file.cc +++ b/cpp/src/file/write_file.cc @@ -24,18 +24,18 @@ #include #include #include -#ifdef _WIN32 -#include -int fsync(int); -#else #include #include -#endif #include "common/config/config.h" #include "common/logger/elog.h" #include "utils/errno_define.h" +#ifdef _WIN32 +#include +int fsync(int); +#endif + using namespace common; namespace storage { diff --git a/cpp/src/reader/aligned_chunk_reader.cc b/cpp/src/reader/aligned_chunk_reader.cc index d79bc7811..59f500b87 100644 --- a/cpp/src/reader/aligned_chunk_reader.cc +++ b/cpp/src/reader/aligned_chunk_reader.cc @@ -19,8 +19,13 @@ #include "aligned_chunk_reader.h" +#include #include +#include "common/global.h" +#ifdef ENABLE_THREADS +#include "common/thread_pool.h" +#endif #include "compress/compressor_factory.h" #include "encoding/decoder_factory.h" @@ -56,19 +61,66 @@ void AlignedChunkReader::reset() { if (file_data_buf != nullptr) { mem_free(file_data_buf); } + time_in_stream_.clear_wrapped_buf(); time_in_stream_.reset(); file_data_buf = value_in_stream_.get_wrapped_buf(); if (file_data_buf != nullptr) { mem_free(file_data_buf); } + value_in_stream_.clear_wrapped_buf(); value_in_stream_.reset(); file_data_time_buf_size_ = 0; file_data_value_buf_size_ = 0; time_chunk_visit_offset_ = 0; value_chunk_visit_offset_ = 0; + page_plan_built_ = false; + current_page_loaded_ = false; + current_page_plan_index_ = 0; + time_predecoded_ = false; + page_all_times_.clear(); + page_time_count_ = 0; + page_time_cursor_ = 0; + + // Free leftover uncompressed buffers from the previous chunk. + if (time_uncompressed_buf_ != nullptr && time_compressor_ != nullptr) { + time_compressor_->after_uncompress(time_uncompressed_buf_); + time_uncompressed_buf_ = nullptr; + } + + // Multi-value reset + for (auto* col : value_columns_) { + // Free uncompressed buffer before resetting. + if (col->uncompressed_buf != nullptr && col->compressor != nullptr) { + col->compressor->after_uncompress(col->uncompressed_buf); + col->uncompressed_buf = nullptr; + } + char* buf = col->in_stream.get_wrapped_buf(); + if (buf != nullptr) mem_free(buf); + col->in_stream.clear_wrapped_buf(); + col->in_stream.reset(); + col->in.reset(); + col->chunk_header.reset(); + col->cur_page_header.reset(); + col->file_data_buf_size = 0; + col->chunk_visit_offset = 0; + col->notnull_bitmap.clear(); + col->cur_value_index = -1; + col->chunk_meta = nullptr; + col->predecoded_values.clear(); + col->predecoded_strings.clear(); + col->predecoded_count = 0; + col->predecoded_read_pos = 0; + col->predecoded = false; + col->predecode_pa.destroy(); + // Note: decoder/compressor are NOT freed here — they are reused by + // alloc_compressor_and_decoder() in load_by_aligned_meta_multi(). + } + release_current_page_state(); + chunk_pages_.clear(); } void AlignedChunkReader::destroy() { + chunk_pages_.clear(); if (time_uncompressed_buf_ != nullptr && time_compressor_ != nullptr) { time_compressor_->after_uncompress(time_uncompressed_buf_); time_uncompressed_buf_ = nullptr; @@ -112,6 +164,34 @@ void AlignedChunkReader::destroy() { } cur_value_page_header_.reset(); chunk_header_.~ChunkHeader(); + + // Multi-value destroy + for (size_t ci = 0; ci < value_columns_.size(); ci++) { + auto* col = value_columns_[ci]; + if (col->decoder != nullptr) { + col->decoder->~Decoder(); + DecoderFactory::free(col->decoder); + col->decoder = nullptr; + } + if (col->compressor != nullptr) { + col->compressor->~Compressor(); + CompressorFactory::free(col->compressor); + col->compressor = nullptr; + } + col->predecode_pa.destroy(); + buf = col->in_stream.get_wrapped_buf(); + if (buf != nullptr) { + mem_free(buf); + col->in_stream.clear_wrapped_buf(); + } + col->cur_page_header.reset(); + delete col; + } + value_columns_.clear(); + release_current_page_state(); +#ifdef ENABLE_THREADS + decode_pool_ = nullptr; // borrowed, not owned +#endif } int AlignedChunkReader::load_by_aligned_meta(ChunkMeta* time_chunk_meta, @@ -218,15 +298,19 @@ int AlignedChunkReader::alloc_compressor_and_decoder( int AlignedChunkReader::get_next_page(TsBlock* ret_tsblock, Filter* oneshoot_filter, PageArena& pa) { + if (multi_value_mode_) { + return get_next_page_multi(ret_tsblock, oneshoot_filter, pa); + } int ret = E_OK; Filter* filter = (oneshoot_filter != nullptr ? oneshoot_filter : time_filter_); - if (prev_time_page_not_finish() && prev_value_page_not_finish()) { - ret = decode_time_value_buf_into_tsblock(ret_tsblock, oneshoot_filter, - &pa); + bool pt = prev_time_page_not_finish(); + bool pv = prev_value_page_not_finish(); + if (pt && pv) { + ret = decode_time_value_buf_into_tsblock(ret_tsblock, filter, &pa); return ret; } - if (!prev_time_page_not_finish() && !prev_value_page_not_finish()) { + if (!pt && !pv) { while (IS_SUCC(ret)) { if (RET_FAIL(get_cur_page_header( time_chunk_meta_, time_in_stream_, cur_time_page_header_, @@ -249,8 +333,7 @@ int AlignedChunkReader::get_next_page(TsBlock* ret_tsblock, } } if (IS_SUCC(ret)) { - ret = decode_time_value_buf_into_tsblock(ret_tsblock, oneshoot_filter, - &pa); + ret = decode_time_value_buf_into_tsblock(ret_tsblock, filter, &pa); } return ret; } @@ -259,7 +342,8 @@ int AlignedChunkReader::get_cur_page_header(ChunkMeta*& chunk_meta, common::ByteStream& in_stream, PageHeader& cur_page_header, uint32_t& chunk_visit_offset, - ChunkHeader& chunk_header) { + ChunkHeader& chunk_header, + int32_t* override_buf_size) { int ret = E_OK; bool retry = true; int cur_page_header_serialized_size = 0; @@ -282,7 +366,8 @@ int AlignedChunkReader::get_cur_page_header(ChunkMeta*& chunk_meta, retry = false; retry_read_want_size += 1024; int32_t& file_data_buf_size = - chunk_header.data_type_ == common::VECTOR + override_buf_size != nullptr ? *override_buf_size + : chunk_header.data_type_ == common::VECTOR ? file_data_time_buf_size_ : file_data_value_buf_size_; // do not shrink buffer for page header, otherwise, the buffer is @@ -319,16 +404,20 @@ int AlignedChunkReader::read_from_file_and_rewrap( int ret = E_OK; const int DEFAULT_READ_SIZE = 4096; // may use page_size + page_header_size char* file_data_buf = in_stream_.get_wrapped_buf(); - int offset = chunk_meta->offset_of_chunk_header_ + chunk_visit_offset; + int64_t offset = chunk_meta->offset_of_chunk_header_ + chunk_visit_offset; int read_size = (want_size < DEFAULT_READ_SIZE ? DEFAULT_READ_SIZE : want_size); if (file_data_buf_size < read_size || (may_shrink && read_size < file_data_buf_size / 10)) { file_data_buf = (char*)mem_realloc(file_data_buf, read_size); if (IS_NULL(file_data_buf)) { + in_stream_.clear_wrapped_buf(); return E_OOM; } file_data_buf_size = read_size; + // Update stream pointer immediately so it stays valid even if + // the subsequent read fails and the caller frees via destroy(). + in_stream_.wrap_from(file_data_buf, read_size); } int ret_read_len = 0; if (RET_FAIL( @@ -550,19 +639,19 @@ int AlignedChunkReader::decode_time_value_buf_into_tsblock( ((value_page_col_notnull_bitmap_[cur_value_index / 8] & \ 0xFF) & \ (mask >> (cur_value_index % 8))) == 0) { \ - if (UNLIKELY(!row_appender.add_row())) { \ - ret = E_OVERFLOW; \ - cur_value_index--; \ - break; \ - } \ ret = time_decoder_->read_int64(time, time_in); \ if (ret != E_OK) { \ break; \ } \ + if (UNLIKELY(!row_appender.add_row())) { \ + ret = E_OVERFLOW; \ + break; \ + } \ row_appender.append(0, (char*)&time, sizeof(time)); \ row_appender.append_null(1); \ continue; \ } \ + assert(value_decoder_->has_remaining(value_in)); \ if (!value_decoder_->has_remaining(value_in)) { \ return common::E_DATA_INCONSISTENCY; \ } \ @@ -597,19 +686,19 @@ int AlignedChunkReader::i32_DECODE_TYPED_TV_INTO_TSBLOCK( if (value_page_col_notnull_bitmap_.empty() || ((value_page_col_notnull_bitmap_[cur_value_index / 8] & 0xFF) & (mask >> (cur_value_index % 8))) == 0) { - if (UNLIKELY(!row_appender.add_row())) { - ret = E_OVERFLOW; - cur_value_index--; - break; - } ret = time_decoder_->read_int64(time, time_in); if (ret != E_OK) { break; } + if (UNLIKELY(!row_appender.add_row())) { + ret = E_OVERFLOW; + break; + } row_appender.append(0, (char*)&time, sizeof(time)); row_appender.append_null(1); continue; } + assert(value_decoder_->has_remaining(value_in)); if (!value_decoder_->has_remaining(value_in)) { return common::E_DATA_INCONSISTENCY; } @@ -632,6 +721,502 @@ int AlignedChunkReader::i32_DECODE_TYPED_TV_INTO_TSBLOCK( return ret; } +int AlignedChunkReader::i32_DECODE_TV_BATCH(ByteStream& time_in, + ByteStream& value_in, + RowAppender& row_appender, + Filter* filter) { + int ret = E_OK; + const int BATCH = 129; + int64_t times[BATCH]; + int32_t values[BATCH]; + const uint32_t null_mask_base = 1 << 7; + + while (time_decoder_->has_remaining(time_in)) { + if (row_appender.remaining() < (uint32_t)BATCH) { + ret = E_OVERFLOW; + break; + } + + // Block-level time filter check + bool block_all_pass = false; + if (filter != nullptr) { + int64_t block_min, block_max; + int block_count; + if (time_decoder_->peek_next_block_range_int64( + time_in, block_min, block_max, block_count)) { + if (!filter->satisfy_start_end_time(block_min, block_max)) { + int skipped = 0; + time_decoder_->skip_peeked_block_int64(time_in, skipped); + int nonnull = 0; + for (int i = 0; i < block_count; ++i) { + int vi = cur_value_index + 1 + i; + if (!value_page_col_notnull_bitmap_.empty() && + ((value_page_col_notnull_bitmap_[vi / 8] & 0xFF) & + (null_mask_base >> (vi % 8))) != 0) { + ++nonnull; + } + } + cur_value_index += block_count; + if (nonnull > 0) { + int sk = 0; + value_decoder_->skip_int32(nonnull, sk, value_in); + } + continue; + } + if (filter->contain_start_end_time(block_min, block_max)) { + block_all_pass = true; + } + } + } + + int time_count = 0; + if (RET_FAIL(time_decoder_->read_batch_int64(times, BATCH, time_count, + time_in))) { + break; + } + if (time_count == 0) break; + + bool is_null[BATCH]; + int nonnull_count = 0; + for (int i = 0; i < time_count; ++i) { + int vi = cur_value_index + 1 + i; + if (value_page_col_notnull_bitmap_.empty() || + ((value_page_col_notnull_bitmap_[vi / 8] & 0xFF) & + (null_mask_base >> (vi % 8))) == 0) { + is_null[i] = true; + } else { + is_null[i] = false; + ++nonnull_count; + } + } + + bool time_mask[BATCH]; + int pass_count = time_count; + if (filter != nullptr && !block_all_pass) { + pass_count = + filter->satisfy_batch_time(times, time_count, time_mask); + } + + if (pass_count == 0) { + if (nonnull_count > 0) { + int skipped = 0; + value_decoder_->skip_int32(nonnull_count, skipped, value_in); + } + cur_value_index += time_count; + continue; + } + + int value_count = 0; + if (nonnull_count > 0) { + if (RET_FAIL(value_decoder_->read_batch_int32( + values, nonnull_count, value_count, value_in))) { + break; + } + } + + int val_idx = 0; + for (int i = 0; i < time_count; ++i) { + cur_value_index++; + if (filter != nullptr && !block_all_pass && !time_mask[i]) { + if (!is_null[i]) ++val_idx; + continue; + } + if (is_null[i]) { + if (UNLIKELY(!row_appender.add_row())) { + ret = E_OVERFLOW; + break; + } + row_appender.append(0, (char*)×[i], sizeof(int64_t)); + row_appender.append_null(1); + } else { + int32_t val = values[val_idx++]; + if (filter != nullptr && !block_all_pass && + !filter->satisfy(times[i], (int64_t)val)) { + continue; + } + if (UNLIKELY(!row_appender.add_row())) { + ret = E_OVERFLOW; + break; + } + row_appender.append(0, (char*)×[i], sizeof(int64_t)); + row_appender.append(1, (char*)&val, sizeof(int32_t)); + } + } + if (ret != E_OK) break; + } + return ret; +} + +int AlignedChunkReader::i64_DECODE_TV_BATCH(ByteStream& time_in, + ByteStream& value_in, + RowAppender& row_appender, + Filter* filter) { + int ret = E_OK; + const int BATCH = 129; + int64_t times[BATCH]; + int64_t values[BATCH]; + const uint32_t null_mask_base = 1 << 7; + + while (time_decoder_->has_remaining(time_in)) { + if (row_appender.remaining() < (uint32_t)BATCH) { + ret = E_OVERFLOW; + break; + } + + // Block-level time filter check: skip entire block if out of range + bool block_all_pass = false; + if (filter != nullptr) { + int64_t block_min, block_max; + int block_count; + if (time_decoder_->peek_next_block_range_int64( + time_in, block_min, block_max, block_count)) { + if (!filter->satisfy_start_end_time(block_min, block_max)) { + int skipped = 0; + time_decoder_->skip_peeked_block_int64(time_in, skipped); + int nonnull = 0; + for (int i = 0; i < block_count; ++i) { + int vi = cur_value_index + 1 + i; + if (!value_page_col_notnull_bitmap_.empty() && + ((value_page_col_notnull_bitmap_[vi / 8] & 0xFF) & + (null_mask_base >> (vi % 8))) != 0) { + ++nonnull; + } + } + cur_value_index += block_count; + if (nonnull > 0) { + int sk = 0; + value_decoder_->skip_int64(nonnull, sk, value_in); + } + continue; + } + if (filter->contain_start_end_time(block_min, block_max)) { + block_all_pass = true; + } + } + } + + int time_count = 0; + if (RET_FAIL(time_decoder_->read_batch_int64(times, BATCH, time_count, + time_in))) { + break; + } + if (time_count == 0) break; + + bool is_null[BATCH]; + int nonnull_count = 0; + for (int i = 0; i < time_count; ++i) { + int vi = cur_value_index + 1 + i; + if (value_page_col_notnull_bitmap_.empty() || + ((value_page_col_notnull_bitmap_[vi / 8] & 0xFF) & + (null_mask_base >> (vi % 8))) == 0) { + is_null[i] = true; + } else { + is_null[i] = false; + ++nonnull_count; + } + } + + bool time_mask[BATCH]; + int pass_count = time_count; + if (filter != nullptr && !block_all_pass) { + pass_count = + filter->satisfy_batch_time(times, time_count, time_mask); + } + + if (pass_count == 0) { + if (nonnull_count > 0) { + int skipped = 0; + value_decoder_->skip_int64(nonnull_count, skipped, value_in); + } + cur_value_index += time_count; + continue; + } + + int value_count = 0; + if (nonnull_count > 0) { + if (RET_FAIL(value_decoder_->read_batch_int64( + values, nonnull_count, value_count, value_in))) { + break; + } + } + + int val_idx = 0; + for (int i = 0; i < time_count; ++i) { + cur_value_index++; + if (filter != nullptr && !block_all_pass && !time_mask[i]) { + if (!is_null[i]) ++val_idx; + continue; + } + if (is_null[i]) { + if (UNLIKELY(!row_appender.add_row())) { + ret = E_OVERFLOW; + break; + } + row_appender.append(0, (char*)×[i], sizeof(int64_t)); + row_appender.append_null(1); + } else { + int64_t val = values[val_idx++]; + if (filter != nullptr && !block_all_pass && + !filter->satisfy(times[i], val)) { + continue; + } + if (UNLIKELY(!row_appender.add_row())) { + ret = E_OVERFLOW; + break; + } + row_appender.append(0, (char*)×[i], sizeof(int64_t)); + row_appender.append(1, (char*)&val, sizeof(int64_t)); + } + } + if (ret != E_OK) break; + } + return ret; +} + +int AlignedChunkReader::float_DECODE_TV_BATCH(ByteStream& time_in, + ByteStream& value_in, + RowAppender& row_appender, + Filter* filter) { + int ret = E_OK; + const int BATCH = 129; + int64_t times[BATCH]; + float values[BATCH]; + const uint32_t null_mask_base = 1 << 7; + + while (time_decoder_->has_remaining(time_in)) { + if (row_appender.remaining() < (uint32_t)BATCH) { + ret = E_OVERFLOW; + break; + } + + // Block-level time filter check + bool block_all_pass = false; + if (filter != nullptr) { + int64_t block_min, block_max; + int block_count; + if (time_decoder_->peek_next_block_range_int64( + time_in, block_min, block_max, block_count)) { + if (!filter->satisfy_start_end_time(block_min, block_max)) { + int skipped = 0; + time_decoder_->skip_peeked_block_int64(time_in, skipped); + int nonnull = 0; + for (int i = 0; i < block_count; ++i) { + int vi = cur_value_index + 1 + i; + if (!value_page_col_notnull_bitmap_.empty() && + ((value_page_col_notnull_bitmap_[vi / 8] & 0xFF) & + (null_mask_base >> (vi % 8))) != 0) { + ++nonnull; + } + } + cur_value_index += block_count; + if (nonnull > 0) { + int sk = 0; + value_decoder_->skip_float(nonnull, sk, value_in); + } + continue; + } + if (filter->contain_start_end_time(block_min, block_max)) { + block_all_pass = true; + } + } + } + + int time_count = 0; + if (RET_FAIL(time_decoder_->read_batch_int64(times, BATCH, time_count, + time_in))) { + break; + } + if (time_count == 0) break; + + bool is_null[BATCH]; + int nonnull_count = 0; + for (int i = 0; i < time_count; ++i) { + int vi = cur_value_index + 1 + i; + if (value_page_col_notnull_bitmap_.empty() || + ((value_page_col_notnull_bitmap_[vi / 8] & 0xFF) & + (null_mask_base >> (vi % 8))) == 0) { + is_null[i] = true; + } else { + is_null[i] = false; + ++nonnull_count; + } + } + + bool time_mask[BATCH]; + int pass_count = time_count; + if (filter != nullptr && !block_all_pass) { + pass_count = + filter->satisfy_batch_time(times, time_count, time_mask); + } + + if (pass_count == 0) { + if (nonnull_count > 0) { + int skipped = 0; + value_decoder_->skip_float(nonnull_count, skipped, value_in); + } + cur_value_index += time_count; + continue; + } + + int value_count = 0; + if (nonnull_count > 0) { + if (RET_FAIL(value_decoder_->read_batch_float( + values, nonnull_count, value_count, value_in))) { + break; + } + } + + int val_idx = 0; + for (int i = 0; i < time_count; ++i) { + cur_value_index++; + if (filter != nullptr && !block_all_pass && !time_mask[i]) { + if (!is_null[i]) ++val_idx; + continue; + } + if (is_null[i]) { + if (UNLIKELY(!row_appender.add_row())) { + ret = E_OVERFLOW; + break; + } + row_appender.append(0, (char*)×[i], sizeof(int64_t)); + row_appender.append_null(1); + } else { + float val = values[val_idx++]; + if (UNLIKELY(!row_appender.add_row())) { + ret = E_OVERFLOW; + break; + } + row_appender.append(0, (char*)×[i], sizeof(int64_t)); + row_appender.append(1, (char*)&val, sizeof(float)); + } + } + if (ret != E_OK) break; + } + return ret; +} + +int AlignedChunkReader::double_DECODE_TV_BATCH(ByteStream& time_in, + ByteStream& value_in, + RowAppender& row_appender, + Filter* filter) { + int ret = E_OK; + const int BATCH = 129; + int64_t times[BATCH]; + double values[BATCH]; + const uint32_t null_mask_base = 1 << 7; + + while (time_decoder_->has_remaining(time_in)) { + if (row_appender.remaining() < (uint32_t)BATCH) { + ret = E_OVERFLOW; + break; + } + + // Block-level time filter check + bool block_all_pass = false; + if (filter != nullptr) { + int64_t block_min, block_max; + int block_count; + if (time_decoder_->peek_next_block_range_int64( + time_in, block_min, block_max, block_count)) { + if (!filter->satisfy_start_end_time(block_min, block_max)) { + int skipped = 0; + time_decoder_->skip_peeked_block_int64(time_in, skipped); + int nonnull = 0; + for (int i = 0; i < block_count; ++i) { + int vi = cur_value_index + 1 + i; + if (!value_page_col_notnull_bitmap_.empty() && + ((value_page_col_notnull_bitmap_[vi / 8] & 0xFF) & + (null_mask_base >> (vi % 8))) != 0) { + ++nonnull; + } + } + cur_value_index += block_count; + if (nonnull > 0) { + int sk = 0; + value_decoder_->skip_double(nonnull, sk, value_in); + } + continue; + } + if (filter->contain_start_end_time(block_min, block_max)) { + block_all_pass = true; + } + } + } + + int time_count = 0; + if (RET_FAIL(time_decoder_->read_batch_int64(times, BATCH, time_count, + time_in))) { + break; + } + if (time_count == 0) break; + + bool is_null[BATCH]; + int nonnull_count = 0; + for (int i = 0; i < time_count; ++i) { + int vi = cur_value_index + 1 + i; + if (value_page_col_notnull_bitmap_.empty() || + ((value_page_col_notnull_bitmap_[vi / 8] & 0xFF) & + (null_mask_base >> (vi % 8))) == 0) { + is_null[i] = true; + } else { + is_null[i] = false; + ++nonnull_count; + } + } + + bool time_mask[BATCH]; + int pass_count = time_count; + if (filter != nullptr && !block_all_pass) { + pass_count = + filter->satisfy_batch_time(times, time_count, time_mask); + } + + if (pass_count == 0) { + if (nonnull_count > 0) { + int skipped = 0; + value_decoder_->skip_double(nonnull_count, skipped, value_in); + } + cur_value_index += time_count; + continue; + } + + int value_count = 0; + if (nonnull_count > 0) { + if (RET_FAIL(value_decoder_->read_batch_double( + values, nonnull_count, value_count, value_in))) { + break; + } + } + + int val_idx = 0; + for (int i = 0; i < time_count; ++i) { + cur_value_index++; + if (filter != nullptr && !block_all_pass && !time_mask[i]) { + if (!is_null[i]) ++val_idx; + continue; + } + if (is_null[i]) { + if (UNLIKELY(!row_appender.add_row())) { + ret = E_OVERFLOW; + break; + } + row_appender.append(0, (char*)×[i], sizeof(int64_t)); + row_appender.append_null(1); + } else { + double val = values[val_idx++]; + if (UNLIKELY(!row_appender.add_row())) { + ret = E_OVERFLOW; + break; + } + row_appender.append(0, (char*)×[i], sizeof(int64_t)); + row_appender.append(1, (char*)&val, sizeof(double)); + } + } + if (ret != E_OK) break; + } + return ret; +} + int AlignedChunkReader::decode_tv_buf_into_tsblock_by_datatype( ByteStream& time_in, ByteStream& value_in, TsBlock* ret_tsblock, Filter* filter, common::PageArena* pa) { @@ -644,8 +1229,6 @@ int AlignedChunkReader::decode_tv_buf_into_tsblock_by_datatype( break; case common::DATE: case common::INT32: - // DECODE_TYPED_TV_INTO_TSBLOCK(int32_t, int32, time_in_, value_in_, - // row_appender); ret = i32_DECODE_TYPED_TV_INTO_TSBLOCK(time_in_, value_in_, row_appender, filter); break; @@ -695,6 +1278,7 @@ int AlignedChunkReader::STRING_DECODE_TYPED_TV_INTO_TSBLOCK( } if (should_read_data) { + assert(value_decoder_->has_remaining(value_in)); if (!value_decoder_->has_remaining(value_in)) { return E_DATA_INCONSISTENCY; } @@ -740,21 +1324,15 @@ bool AlignedChunkReader::should_skip_page_by_offset(int& row_offset) { if (row_offset <= 0) { return false; } - // Aligned TV pages: only skip a whole page by count when both page headers - // expose the same positive row count. Using a single side (or min) when - // the other is missing or unequal can desynchronize row_offset from - // decoded row order vs. the paired time/value stream. - Statistic* ts = cur_time_page_header_.statistic_; - Statistic* vs = cur_value_page_header_.statistic_; - if (ts == nullptr || vs == nullptr) { - return false; + // Use time page statistic for count. + Statistic* stat = cur_time_page_header_.statistic_; + if (stat == nullptr) { + stat = cur_value_page_header_.statistic_; } - int32_t tc = ts->count_; - int32_t vc = vs->count_; - if (tc <= 0 || vc <= 0 || tc != vc) { + if (stat == nullptr || stat->count_ == 0) { return false; } - int32_t count = tc; + int32_t count = stat->count_; if (row_offset >= count) { row_offset -= count; return true; @@ -766,6 +1344,9 @@ int AlignedChunkReader::get_next_page(TsBlock* ret_tsblock, Filter* oneshoot_filter, PageArena& pa, int64_t min_time_hint, int& row_offset, int& row_limit) { + if (multi_value_mode_) { + return get_next_page_multi(ret_tsblock, oneshoot_filter, pa); + } int ret = E_OK; Filter* filter = (oneshoot_filter != nullptr ? oneshoot_filter : time_filter_); @@ -774,12 +1355,14 @@ int AlignedChunkReader::get_next_page(TsBlock* ret_tsblock, return E_NO_MORE_DATA; } - if (prev_time_page_not_finish() && prev_value_page_not_finish()) { - ret = decode_time_value_buf_into_tsblock(ret_tsblock, oneshoot_filter, - &pa); + bool pt = prev_time_page_not_finish(); + bool pv = prev_value_page_not_finish(); + + if (pt && pv) { + ret = decode_time_value_buf_into_tsblock(ret_tsblock, filter, &pa); return ret; } - if (!prev_time_page_not_finish() && !prev_value_page_not_finish()) { + if (!pt && !pv) { while (IS_SUCC(ret)) { if (RET_FAIL(get_cur_page_header( time_chunk_meta_, time_in_stream_, cur_time_page_header_, @@ -810,10 +1393,1134 @@ int AlignedChunkReader::get_next_page(TsBlock* ret_tsblock, } } if (IS_SUCC(ret)) { - ret = decode_time_value_buf_into_tsblock(ret_tsblock, oneshoot_filter, - &pa); + ret = decode_time_value_buf_into_tsblock(ret_tsblock, filter, &pa); + } + return ret; +} + +// ══════════════════════════════════════════════════════════════════════════ +// Multi-value AlignedChunkReader implementation +// ══════════════════════════════════════════════════════════════════════════ + +int AlignedChunkReader::load_by_aligned_meta_multi( + ChunkMeta* time_chunk_meta, const std::vector& value_metas) { + int ret = E_OK; + multi_value_mode_ = true; + time_chunk_meta_ = time_chunk_meta; + page_plan_built_ = false; + current_page_loaded_ = false; + current_page_plan_index_ = 0; + time_predecoded_ = false; + page_all_times_.clear(); + page_time_count_ = 0; + page_time_cursor_ = 0; + + // ── Load time chunk header ── + file_data_time_buf_size_ = 1024; + int32_t ret_read_len = 0; + char* time_file_data_buf = + (char*)mem_alloc(file_data_time_buf_size_, MOD_CHUNK_READER); + if (IS_NULL(time_file_data_buf)) return E_OOM; + + ret = read_file_->read(time_chunk_meta_->offset_of_chunk_header_, + time_file_data_buf, file_data_time_buf_size_, + ret_read_len); + if (IS_SUCC(ret) && ret_read_len < ChunkHeader::MIN_SERIALIZED_SIZE) { + ret = E_TSFILE_CORRUPTED; + mem_free(time_file_data_buf); + return ret; + } + if (IS_SUCC(ret)) { + time_in_stream_.wrap_from(time_file_data_buf, ret_read_len); + if (RET_FAIL(time_chunk_header_.deserialize_from(time_in_stream_))) { + return ret; + } + time_chunk_visit_offset_ = time_in_stream_.read_pos(); + } + + // Alloc time decoder/compressor + if (IS_SUCC(ret)) { + if (RET_FAIL(alloc_compressor_and_decoder( + time_decoder_, time_compressor_, + time_chunk_header_.encoding_type_, + time_chunk_header_.data_type_, + time_chunk_header_.compression_type_))) { + return ret; + } + } + + // ── Load each value column ── + // Reuse existing ValueColumnState objects if count matches (reset() already + // cleared their internal state). Otherwise, recreate. + if (value_columns_.size() != value_metas.size()) { + for (auto* p : value_columns_) delete p; + value_columns_.clear(); + value_columns_.reserve(value_metas.size()); + for (size_t c = 0; c < value_metas.size(); c++) { + value_columns_.push_back(new ValueColumnState); + } + } + for (size_t c = 0; c < value_metas.size() && IS_SUCC(ret); c++) { + auto* col = value_columns_[c]; + col->chunk_meta = value_metas[c]; + col->file_data_buf_size = 1024; + ret_read_len = 0; + char* vbuf = + (char*)mem_alloc(col->file_data_buf_size, MOD_CHUNK_READER); + if (IS_NULL(vbuf)) return E_OOM; + + ret = read_file_->read(col->chunk_meta->offset_of_chunk_header_, vbuf, + col->file_data_buf_size, ret_read_len); + if (IS_SUCC(ret) && ret_read_len < ChunkHeader::MIN_SERIALIZED_SIZE) { + ret = E_TSFILE_CORRUPTED; + mem_free(vbuf); + break; + } + if (IS_SUCC(ret)) { + col->in_stream.wrap_from(vbuf, ret_read_len); + if (RET_FAIL(col->chunk_header.deserialize_from(col->in_stream))) { + break; + } + col->chunk_visit_offset = col->in_stream.read_pos(); + if (RET_FAIL(alloc_compressor_and_decoder( + col->decoder, col->compressor, + col->chunk_header.encoding_type_, + col->chunk_header.data_type_, + col->chunk_header.compression_type_))) { + break; + } + } + } + + return ret; +} + +bool AlignedChunkReader::has_more_data_multi() const { + if (page_plan_built_) { + if (current_page_loaded_) { + return page_time_cursor_ < page_time_count_; + } + return current_page_plan_index_ < chunk_pages_.size(); + } + if (prev_time_page_not_finish() || prev_any_value_page_not_finish_multi()) { + return true; + } + if (time_chunk_visit_offset_ - time_chunk_header_.serialized_size_ < + time_chunk_header_.data_size_) { + return true; + } + for (const auto* col : value_columns_) { + if (col->chunk_visit_offset - col->chunk_header.serialized_size_ < + col->chunk_header.data_size_) { + return true; + } + } + return false; +} + +bool AlignedChunkReader::prev_any_value_page_not_finish_multi() const { + for (const auto* col : value_columns_) { + if ((col->decoder && col->decoder->has_remaining(col->in)) || + col->in.has_remaining()) { + return true; + } + } + return false; +} + +bool AlignedChunkReader::has_variable_length_value_column() const { + for (const auto* col : value_columns_) { + if (col->chunk_header.data_type_ == common::STRING || + col->chunk_header.data_type_ == common::TEXT || + col->chunk_header.data_type_ == common::BLOB) { + return true; + } + } + return false; +} + +int AlignedChunkReader::count_non_null_prefix( + const std::vector& bitmap, int32_t row_limit) const { + if (row_limit <= 0 || bitmap.empty()) { + return 0; + } + const uint32_t mask_base = 1 << 7; + int count = 0; + for (int32_t i = 0; i < row_limit; i++) { + if (((bitmap[i / 8] & 0xFF) & (mask_base >> (i % 8))) != 0) { + count++; + } + } + return count; +} + +int AlignedChunkReader::decode_time_page_direct( + const ChunkPageInfo& page_info, std::vector& out_times) { + out_times.clear(); + if (page_info.time_compressed_size == 0) { + return E_OK; + } + + char stack_buf[4096]; + char* compressed_buf = stack_buf; + bool heap = page_info.time_compressed_size > sizeof(stack_buf); + if (heap) { + compressed_buf = static_cast(common::mem_alloc( + page_info.time_compressed_size, common::MOD_DEFAULT)); + if (compressed_buf == nullptr) { + return E_OOM; + } + } + + int32_t read_len = 0; + int ret = read_file_->read(page_info.time_file_offset, compressed_buf, + page_info.time_compressed_size, read_len); + if (IS_FAIL(ret)) { + if (heap) common::mem_free(compressed_buf); + return ret; + } + + char* uncompressed_buf = nullptr; + uint32_t uncompressed_size = 0; + if (RET_FAIL(time_compressor_->reset(false))) { + if (heap) common::mem_free(compressed_buf); + return ret; + } + ret = time_compressor_->uncompress(compressed_buf, + page_info.time_compressed_size, + uncompressed_buf, uncompressed_size); + if (heap && compressed_buf != uncompressed_buf) { + common::mem_free(compressed_buf); + } + if (IS_FAIL(ret) || uncompressed_size != page_info.time_uncompressed_size) { + if (uncompressed_buf != nullptr) { + time_compressor_->after_uncompress(uncompressed_buf); + } + return E_TSFILE_CORRUPTED; + } + + common::ByteStream in; + in.wrap_from(uncompressed_buf, uncompressed_size); + time_decoder_->reset(); + const int batch_size = 1024; + int64_t batch[batch_size]; + while (time_decoder_->has_remaining(in)) { + int actual = 0; + if (RET_FAIL(time_decoder_->read_batch_int64(batch, batch_size, actual, + in))) { + break; + } + if (actual == 0) { + break; + } + out_times.insert(out_times.end(), batch, batch + actual); + } + time_compressor_->after_uncompress(uncompressed_buf); + return ret; +} + +int AlignedChunkReader::build_page_plan(Filter* filter) { + int ret = E_OK; + chunk_pages_.clear(); + current_page_plan_index_ = 0; + current_page_loaded_ = false; + page_plan_built_ = false; + + const uint32_t num_cols = value_columns_.size(); + while (IS_SUCC(ret)) { + if (time_chunk_visit_offset_ - time_chunk_header_.serialized_size_ >= + time_chunk_header_.data_size_) { + break; + } + + if (RET_FAIL(get_cur_page_header( + time_chunk_meta_, time_in_stream_, cur_time_page_header_, + time_chunk_visit_offset_, time_chunk_header_))) { + break; + } + if (cur_time_page_header_.compressed_size_ == 0 && + cur_time_page_header_.uncompressed_size_ == 0) { + break; + } + + ChunkPageInfo page_info; + page_info.time_file_offset = time_chunk_meta_->offset_of_chunk_header_ + + time_chunk_visit_offset_; + page_info.time_compressed_size = cur_time_page_header_.compressed_size_; + page_info.time_uncompressed_size = + cur_time_page_header_.uncompressed_size_; + page_info.value_file_offsets.resize(num_cols); + page_info.value_compressed_sizes.resize(num_cols); + page_info.value_uncompressed_sizes.resize(num_cols); + + for (uint32_t c = 0; c < num_cols && IS_SUCC(ret); c++) { + auto* col = value_columns_[c]; + if (RET_FAIL(get_cur_page_header( + col->chunk_meta, col->in_stream, col->cur_page_header, + col->chunk_visit_offset, col->chunk_header, + &col->file_data_buf_size))) { + break; + } + page_info.value_file_offsets[c] = + col->chunk_meta->offset_of_chunk_header_ + + col->chunk_visit_offset; + page_info.value_compressed_sizes[c] = + col->cur_page_header.compressed_size_; + page_info.value_uncompressed_sizes[c] = + col->cur_page_header.uncompressed_size_; + } + if (IS_FAIL(ret)) { + break; + } + + Statistic* stat = cur_time_page_header_.statistic_; + if (filter == nullptr) { + page_info.pass_type = PagePassType::FULL_PASS; + page_info.row_begin = 0; + page_info.row_end = stat != nullptr ? stat->count_ : 0; + } else if (stat != nullptr && !filter->satisfy(stat)) { + page_info.pass_type = PagePassType::SKIP; + } else if (stat != nullptr && filter->contain_start_end_time( + stat->start_time_, stat->end_time_)) { + page_info.pass_type = PagePassType::FULL_PASS; + page_info.row_begin = 0; + page_info.row_end = stat->count_; + } else { + page_info.pass_type = PagePassType::BOUNDARY; + std::vector times; + if (RET_FAIL(decode_time_page_direct(page_info, times))) { + break; + } + int32_t first = -1; + int32_t last = -1; + for (int32_t i = 0; i < static_cast(times.size()); i++) { + if (filter->satisfy_start_end_time(times[i], times[i])) { + if (first < 0) first = i; + last = i; + } + } + if (first >= 0) { + page_info.row_begin = first; + page_info.row_end = last + 1; + } else { + page_info.pass_type = PagePassType::SKIP; + } + } + + if (page_info.pass_type != PagePassType::SKIP) { + if (page_info.row_end == 0) { + std::vector times; + if (RET_FAIL(decode_time_page_direct(page_info, times))) { + break; + } + page_info.row_end = static_cast(times.size()); + } + if (page_info.row_begin < page_info.row_end) { + chunk_pages_.push_back(std::move(page_info)); + } + } + + time_chunk_visit_offset_ += cur_time_page_header_.compressed_size_; + time_in_stream_.wrapped_buf_advance_read_pos( + cur_time_page_header_.compressed_size_); + for (uint32_t c = 0; c < num_cols; c++) { + auto* col = value_columns_[c]; + col->chunk_visit_offset += col->cur_page_header.compressed_size_; + col->in_stream.wrapped_buf_advance_read_pos( + col->cur_page_header.compressed_size_); + } + } + + page_plan_built_ = IS_SUCC(ret); + return ret; +} + +void AlignedChunkReader::release_current_page_state() { + time_predecoded_ = false; + page_all_times_.clear(); + page_time_count_ = 0; + page_time_cursor_ = 0; + for (auto* col : value_columns_) { + if (col->uncompressed_buf != nullptr && col->compressor != nullptr) { + col->compressor->after_uncompress(col->uncompressed_buf); + col->uncompressed_buf = nullptr; + } + col->notnull_bitmap.clear(); + col->predecoded_values.clear(); + col->predecoded_strings.clear(); + col->predecoded_count = 0; + col->predecoded_read_pos = 0; + col->predecoded = false; + col->cur_value_index = -1; + col->in.reset(); + col->predecode_pa.destroy(); + } + current_page_loaded_ = false; +} + +int AlignedChunkReader::predecode_value_page_for_plan( + uint32_t col_idx, const ChunkPageInfo& page_info) { + auto* col = value_columns_[col_idx]; + col->notnull_bitmap.clear(); + col->predecoded_values.clear(); + col->predecoded_strings.clear(); + col->predecoded_read_pos = 0; + col->predecoded_count = 0; + col->predecoded = false; + col->predecode_pa.destroy(); + + if (page_info.value_compressed_sizes[col_idx] == 0) { + col->in.wrap_from(nullptr, 0); + return E_OK; + } + + char stack_buf[4096]; + char* compressed_buf = stack_buf; + bool heap = page_info.value_compressed_sizes[col_idx] > sizeof(stack_buf); + if (heap) { + compressed_buf = static_cast(common::mem_alloc( + page_info.value_compressed_sizes[col_idx], common::MOD_DEFAULT)); + if (compressed_buf == nullptr) { + return E_OOM; + } + } + + int32_t read_len = 0; + int ret = + read_file_->read(page_info.value_file_offsets[col_idx], compressed_buf, + page_info.value_compressed_sizes[col_idx], read_len); + if (IS_FAIL(ret)) { + if (heap) common::mem_free(compressed_buf); + return ret; + } + + char* uncompressed_buf = nullptr; + uint32_t uncompressed_size = 0; + if (RET_FAIL(col->compressor->reset(false))) { + if (heap) common::mem_free(compressed_buf); + return ret; + } + ret = col->compressor->uncompress(compressed_buf, + page_info.value_compressed_sizes[col_idx], + uncompressed_buf, uncompressed_size); + if (heap && compressed_buf != uncompressed_buf) { + common::mem_free(compressed_buf); + } + if (IS_FAIL(ret) || + uncompressed_size != page_info.value_uncompressed_sizes[col_idx]) { + if (uncompressed_buf != nullptr) { + col->compressor->after_uncompress(uncompressed_buf); + } + return E_TSFILE_CORRUPTED; + } + col->uncompressed_buf = uncompressed_buf; + + uint32_t offset = 0; + uint32_t data_num = SerializationUtil::read_ui32(uncompressed_buf); + offset += sizeof(uint32_t); + col->notnull_bitmap.resize((data_num + 7) / 8); + for (size_t i = 0; i < col->notnull_bitmap.size(); i++) { + col->notnull_bitmap[i] = *(uncompressed_buf + offset++); + } + + char* value_buf = uncompressed_buf + offset; + uint32_t value_buf_size = uncompressed_size - offset; + common::ByteStream in; + in.wrap_from(value_buf, value_buf_size); + col->decoder->reset(); + + auto dt = col->chunk_header.data_type_; + int nonnull_total = count_non_null_prefix(col->notnull_bitmap, + static_cast(data_num)); + int prefix_nonnull = + count_non_null_prefix(col->notnull_bitmap, page_info.row_begin); + col->predecoded_read_pos = prefix_nonnull; + + if (dt == common::STRING || dt == common::TEXT || dt == common::BLOB) { + col->predecode_pa.init(512, common::MOD_TSFILE_READER); + col->predecoded_strings.resize(nonnull_total); + for (int i = 0; i < nonnull_total; i++) { + if (RET_FAIL(col->decoder->read_String(col->predecoded_strings[i], + col->predecode_pa, in))) { + return ret; + } + } + col->predecoded_count = nonnull_total; + col->predecoded = true; + return E_OK; + } + + if (nonnull_total == 0) { + col->predecoded = true; + return E_OK; + } + + uint32_t elem_size = common::get_data_type_size(dt); + col->predecoded_values.resize(static_cast(nonnull_total) * + elem_size); + int actual = 0; + switch (dt) { + case common::BOOLEAN: { + bool* out = reinterpret_cast(col->predecoded_values.data()); + for (int i = 0; i < nonnull_total; i++) { + if (RET_FAIL(col->decoder->read_boolean(out[i], in))) { + return ret; + } + } + actual = nonnull_total; + break; + } + case common::INT32: + case common::DATE: + if (RET_FAIL(col->decoder->read_batch_int32( + reinterpret_cast(col->predecoded_values.data()), + nonnull_total, actual, in))) { + return ret; + } + break; + case common::INT64: + case common::TIMESTAMP: + if (RET_FAIL(col->decoder->read_batch_int64( + reinterpret_cast(col->predecoded_values.data()), + nonnull_total, actual, in))) { + return ret; + } + break; + case common::FLOAT: + if (RET_FAIL(col->decoder->read_batch_float( + reinterpret_cast(col->predecoded_values.data()), + nonnull_total, actual, in))) { + return ret; + } + break; + case common::DOUBLE: + if (RET_FAIL(col->decoder->read_batch_double( + reinterpret_cast(col->predecoded_values.data()), + nonnull_total, actual, in))) { + return ret; + } + break; + default: + return E_NOT_SUPPORT; + } + col->predecoded_count = actual; + col->predecoded = true; + return E_OK; +} + +int AlignedChunkReader::load_current_planned_page() { + if (current_page_plan_index_ >= chunk_pages_.size()) { + return E_NO_MORE_DATA; + } + + release_current_page_state(); + const ChunkPageInfo& page_info = chunk_pages_[current_page_plan_index_]; + int ret = decode_time_page_direct(page_info, page_all_times_); + if (IS_FAIL(ret)) { + return ret; + } + page_time_cursor_ = page_info.row_begin; + page_time_count_ = page_info.row_end; + time_predecoded_ = true; + +#ifdef ENABLE_THREADS + if (decode_pool_ != nullptr && value_columns_.size() > 1) { + std::vector col_rets(value_columns_.size(), E_OK); + for (uint32_t c = 0; c < value_columns_.size(); c++) { + decode_pool_->submit([&, c]() { + col_rets[c] = predecode_value_page_for_plan(c, page_info); + }); + } + decode_pool_->wait_all(); + for (uint32_t c = 0; c < value_columns_.size(); c++) { + if (IS_FAIL(col_rets[c])) { + return col_rets[c]; + } + } + } else +#endif + { + for (uint32_t c = 0; c < value_columns_.size(); c++) { + if (RET_FAIL(predecode_value_page_for_plan(c, page_info))) { + return ret; + } + } + } + + current_page_loaded_ = true; + return E_OK; +} + +int AlignedChunkReader::scatter_current_page(common::TsBlock* ret_tsblock, + RowAppender& row_appender, + common::PageArena* pa) { + const uint32_t null_mask_base = 1 << 7; + while (page_time_cursor_ < page_time_count_) { + if (row_appender.remaining() == 0) { + return E_OVERFLOW; + } + + int64_t ts = page_all_times_[page_time_cursor_]; + if (UNLIKELY(!row_appender.add_row())) { + return E_OVERFLOW; + } + row_appender.append(0, reinterpret_cast(&ts), sizeof(ts)); + + for (uint32_t c = 0; c < value_columns_.size(); c++) { + auto* col = value_columns_[c]; + bool is_null = true; + if (!col->notnull_bitmap.empty()) { + is_null = ((col->notnull_bitmap[page_time_cursor_ / 8] & 0xFF) & + (null_mask_base >> (page_time_cursor_ % 8))) == 0; + } + if (is_null) { + row_appender.append_null(c + 1); + continue; + } + + if (col->chunk_header.data_type_ == common::STRING || + col->chunk_header.data_type_ == common::TEXT || + col->chunk_header.data_type_ == common::BLOB) { + const common::String& value = + col->predecoded_strings[col->predecoded_read_pos++]; + row_appender.append(c + 1, value.buf_, value.len_); + } else { + uint32_t elem_size = + common::get_data_type_size(col->chunk_header.data_type_); + row_appender.append( + c + 1, + col->predecoded_values.data() + + static_cast(col->predecoded_read_pos++) * + elem_size, + elem_size); + } + } + page_time_cursor_++; + } + + current_page_plan_index_++; + release_current_page_state(); + return E_OK; +} + +int AlignedChunkReader::get_next_page_multi(TsBlock* ret_tsblock, + Filter* oneshoot_filter, + PageArena& pa) { + int ret = E_OK; + Filter* filter = + (oneshoot_filter != nullptr ? oneshoot_filter : time_filter_); + + if (!page_plan_built_) { + if (RET_FAIL(build_page_plan(filter))) { + return ret; + } + } + if (chunk_pages_.empty()) { + return E_NO_MORE_DATA; + } + + while (current_page_plan_index_ < chunk_pages_.size()) { + if (!current_page_loaded_) { + if (RET_FAIL(load_current_planned_page())) { + return ret; + } + } + RowAppender row_appender(ret_tsblock); + ret = scatter_current_page(ret_tsblock, row_appender, &pa); + if (ret == E_OVERFLOW) { + return E_OK; + } + if (IS_FAIL(ret)) { + return ret; + } + } + return E_NO_MORE_DATA; +} + +int AlignedChunkReader::get_next_page_multi_serial(TsBlock* ret_tsblock, + Filter* filter, + PageArena& pa) { + int ret = E_OK; + bool pt = prev_time_page_not_finish(); + bool pv = prev_any_value_page_not_finish_multi(); + if (pt && pv) { + ret = + decode_time_value_buf_into_tsblock_multi(ret_tsblock, filter, &pa); + return ret; + } + if (!pt && !pv) { + while (IS_SUCC(ret)) { + if (RET_FAIL(get_cur_page_header( + time_chunk_meta_, time_in_stream_, cur_time_page_header_, + time_chunk_visit_offset_, time_chunk_header_))) { + break; + } + for (size_t c = 0; c < value_columns_.size() && IS_SUCC(ret); c++) { + auto* col = value_columns_[c]; + if (RET_FAIL(get_cur_page_header( + col->chunk_meta, col->in_stream, col->cur_page_header, + col->chunk_visit_offset, col->chunk_header, + &col->file_data_buf_size))) { + } + } + if (IS_FAIL(ret)) break; + if (cur_page_statisify_filter_multi(filter)) break; + if (RET_FAIL(skip_cur_page_multi())) break; + if (!has_more_data()) { + ret = E_NO_MORE_DATA; + break; + } + } + if (IS_SUCC(ret)) { + ret = decode_cur_time_page_data(); + if (IS_SUCC(ret)) ret = decode_cur_value_pages_multi(); + } + } + if (IS_SUCC(ret)) { + ret = + decode_time_value_buf_into_tsblock_multi(ret_tsblock, filter, &pa); + } + return ret; +} + +bool AlignedChunkReader::cur_page_statisify_filter_multi(Filter* filter) { + bool time_satisfy = filter == nullptr || + cur_time_page_header_.statistic_ == nullptr || + filter->satisfy(cur_time_page_header_.statistic_); + return time_satisfy; +} + +int AlignedChunkReader::skip_cur_page_multi() { + time_chunk_visit_offset_ += cur_time_page_header_.compressed_size_; + time_in_stream_.wrapped_buf_advance_read_pos( + cur_time_page_header_.compressed_size_); + for (auto* col : value_columns_) { + col->chunk_visit_offset += col->cur_page_header.compressed_size_; + col->in_stream.wrapped_buf_advance_read_pos( + col->cur_page_header.compressed_size_); + } + return E_OK; +} + +int AlignedChunkReader::decode_cur_value_pages_multi() { + int ret = E_OK; + // Phase 1: Serial IO — ensure each column's page data is in memory. + for (size_t c = 0; c < value_columns_.size() && IS_SUCC(ret); c++) { + ret = ensure_value_page_loaded(*value_columns_[c]); + } + if (IS_FAIL(ret)) return ret; + + // Phase 2: Parallel CPU — decompress + parse bitmap + reset decoder. +#ifdef ENABLE_THREADS + if (value_columns_.size() > 1 && decode_pool_ != nullptr) { + std::vector col_rets(value_columns_.size(), E_OK); + for (size_t c = 0; c < value_columns_.size(); c++) { + auto* col = value_columns_[c]; + int* col_ret = &col_rets[c]; + decode_pool_->submit([col, col_ret] { + *col_ret = decompress_and_parse_value_page(*col); + }); + } + decode_pool_->wait_all(); + for (size_t c = 0; c < col_rets.size(); c++) { + if (IS_FAIL(col_rets[c])) return col_rets[c]; + } + } else +#endif + { + for (size_t c = 0; c < value_columns_.size() && IS_SUCC(ret); c++) { + ret = decompress_and_parse_value_page(*value_columns_[c]); + } + } + return ret; +} + +int AlignedChunkReader::decode_cur_value_page_data_for(ValueColumnState& col) { + int ret = E_OK; + + // Step 1: ensure full page data is loaded + if (col.in_stream.remaining_size() < col.cur_page_header.compressed_size_) { + if (RET_FAIL(read_from_file_and_rewrap( + col.in_stream, col.chunk_meta, col.chunk_visit_offset, + col.file_data_buf_size, + col.cur_page_header.compressed_size_))) { + return ret; + } + } + + if (col.cur_page_header.compressed_size_ == 0) { + col.in.wrap_from(nullptr, 0); + return E_OK; + } + + // Step 2: uncompress + char* compressed_buf = + col.in_stream.get_wrapped_buf() + col.in_stream.read_pos(); + uint32_t compressed_size = col.cur_page_header.compressed_size_; + col.in_stream.wrapped_buf_advance_read_pos(compressed_size); + col.chunk_visit_offset += compressed_size; + + char* uncompressed_buf = nullptr; + uint32_t uncompressed_size = 0; + if (RET_FAIL(col.compressor->reset(false))) { + return ret; + } + if (RET_FAIL(col.compressor->uncompress(compressed_buf, compressed_size, + uncompressed_buf, + uncompressed_size))) { + return ret; + } + col.uncompressed_buf = uncompressed_buf; + + if (uncompressed_size != col.cur_page_header.uncompressed_size_) { + return E_TSFILE_CORRUPTED; + } + + // Step 3: parse bitmap + value data + uint32_t offset = 0; + uint32_t data_num = SerializationUtil::read_ui32(uncompressed_buf); + offset += sizeof(uint32_t); + col.notnull_bitmap.resize((data_num + 7) / 8); + for (size_t i = 0; i < col.notnull_bitmap.size(); i++) { + col.notnull_bitmap[i] = *(uncompressed_buf + offset); + offset++; + } + col.cur_value_index = -1; + + char* value_buf = uncompressed_buf + offset; + uint32_t value_buf_size = uncompressed_size - offset; + col.decoder->reset(); + col.in.wrap_from(value_buf, value_buf_size); + return ret; +} + +int AlignedChunkReader::ensure_value_page_loaded(ValueColumnState& col) { + int ret = E_OK; + if (col.in_stream.remaining_size() < col.cur_page_header.compressed_size_) { + if (RET_FAIL(read_from_file_and_rewrap( + col.in_stream, col.chunk_meta, col.chunk_visit_offset, + col.file_data_buf_size, + col.cur_page_header.compressed_size_))) { + return ret; + } + } + return ret; +} + +int AlignedChunkReader::decompress_and_parse_value_page(ValueColumnState& col) { + int ret = E_OK; + + if (col.cur_page_header.compressed_size_ == 0) { + col.in.wrap_from(nullptr, 0); + return E_OK; + } + + // Decompress + char* compressed_buf = + col.in_stream.get_wrapped_buf() + col.in_stream.read_pos(); + uint32_t compressed_size = col.cur_page_header.compressed_size_; + col.in_stream.wrapped_buf_advance_read_pos(compressed_size); + col.chunk_visit_offset += compressed_size; + + char* uncompressed_buf = nullptr; + uint32_t uncompressed_size = 0; + if (RET_FAIL(col.compressor->reset(false))) { + return ret; + } + if (RET_FAIL(col.compressor->uncompress(compressed_buf, compressed_size, + uncompressed_buf, + uncompressed_size))) { + return ret; + } + col.uncompressed_buf = uncompressed_buf; + + if (uncompressed_size != col.cur_page_header.uncompressed_size_) { + return E_TSFILE_CORRUPTED; + } + + // Parse bitmap + value data + uint32_t offset = 0; + uint32_t data_num = SerializationUtil::read_ui32(uncompressed_buf); + offset += sizeof(uint32_t); + col.notnull_bitmap.resize((data_num + 7) / 8); + for (size_t i = 0; i < col.notnull_bitmap.size(); i++) { + col.notnull_bitmap[i] = *(uncompressed_buf + offset); + offset++; + } + col.cur_value_index = -1; + + char* value_buf = uncompressed_buf + offset; + uint32_t value_buf_size = uncompressed_size - offset; + col.decoder->reset(); + col.in.wrap_from(value_buf, value_buf_size); + return ret; +} + +int AlignedChunkReader::decode_time_value_buf_into_tsblock_multi( + TsBlock*& ret_tsblock, Filter* filter, PageArena* pa) { + int ret = E_OK; + RowAppender row_appender(ret_tsblock); + ret = multi_DECODE_TV_BATCH(ret_tsblock, row_appender, filter, pa); + + // Release uncompressed buffers if pages are done + if (ret != E_OVERFLOW) { + if (time_uncompressed_buf_ != nullptr) { + time_compressor_->after_uncompress(time_uncompressed_buf_); + time_uncompressed_buf_ = nullptr; + } + for (auto* col : value_columns_) { + if (col->uncompressed_buf != nullptr) { + col->compressor->after_uncompress(col->uncompressed_buf); + col->uncompressed_buf = nullptr; + } + if (!(col->decoder && col->decoder->has_remaining(col->in)) && + !col->in.has_remaining()) { + col->in.reset(); + } + col->notnull_bitmap.clear(); + col->notnull_bitmap.shrink_to_fit(); + } + if (!prev_time_page_not_finish()) { + time_in_.reset(); + } + } else { + ret = E_OK; + } + return ret; +} + +int AlignedChunkReader::multi_DECODE_TV_BATCH(TsBlock* ret_tsblock, + RowAppender& row_appender, + Filter* filter, PageArena* pa) { + int ret = E_OK; + const int BATCH = 129; + int64_t times[BATCH]; + const uint32_t null_mask_base = 1 << 7; + const uint32_t num_cols = value_columns_.size(); + + while (time_decoder_->has_remaining(time_in_)) { + if (row_appender.remaining() < (uint32_t)BATCH) { + ret = E_OVERFLOW; + break; + } + + // ── Phase 1: Decode a batch of timestamps ── + int time_count = 0; + if (RET_FAIL(time_decoder_->read_batch_int64(times, BATCH, time_count, + time_in_))) { + break; + } + if (time_count == 0) break; + + // ── Phase 2: Apply time filter ── + bool time_mask[BATCH]; + bool block_all_pass = (filter == nullptr); + int pass_count = time_count; + if (!block_all_pass) { + pass_count = + filter->satisfy_batch_time(times, time_count, time_mask); + } + + // ── Phase 3: Per-column null check + value decode ── + // For each column, compute null flags and decode non-null values. + // We store decoded values in column-specific buffers. + // Max 8 bytes per value, 129 values per batch. + struct ColBatch { + bool is_null[BATCH]; + int nonnull_count; + // Value buffer — up to 129 * 8 bytes = 1032 bytes on stack + char val_buf[BATCH * 8]; + int val_count; + }; + // Allocate on heap if many columns, stack for small counts + std::vector col_batches(num_cols); + + for (uint32_t c = 0; c < num_cols; c++) { + auto* col = value_columns_[c]; + auto& cb = col_batches[c]; + cb.nonnull_count = 0; + cb.val_count = 0; + for (int i = 0; i < time_count; i++) { + int vi = col->cur_value_index + 1 + i; + if (col->notnull_bitmap.empty() || + ((col->notnull_bitmap[vi / 8] & 0xFF) & + (null_mask_base >> (vi % 8))) == 0) { + cb.is_null[i] = true; + } else { + cb.is_null[i] = false; + cb.nonnull_count++; + } + } + + // Skip values if no rows pass time filter + if (pass_count == 0 && cb.nonnull_count > 0) { + switch (col->chunk_header.data_type_) { + case common::BOOLEAN: { + // Booleans are 1 byte each; skip by reading and + // discarding + for (int s = 0; s < cb.nonnull_count; s++) { + bool dummy; + col->decoder->read_boolean(dummy, col->in); + } + break; + } + case common::INT32: + case common::DATE: { + int sk = 0; + col->decoder->skip_int32(cb.nonnull_count, sk, col->in); + break; + } + case common::INT64: + case common::TIMESTAMP: { + int sk = 0; + col->decoder->skip_int64(cb.nonnull_count, sk, col->in); + break; + } + case common::FLOAT: { + int sk = 0; + col->decoder->skip_float(cb.nonnull_count, sk, col->in); + break; + } + case common::DOUBLE: { + int sk = 0; + col->decoder->skip_double(cb.nonnull_count, sk, + col->in); + break; + } + default: + // STRING etc - fall through to value decode + break; + } + cb.nonnull_count = 0; // already skipped + } + + // Decode non-null values + if (cb.nonnull_count > 0) { + switch (col->chunk_header.data_type_) { + case common::BOOLEAN: { + bool* out = reinterpret_cast(cb.val_buf); + cb.val_count = 0; + for (int s = 0; s < cb.nonnull_count; s++) { + bool v; + if (col->decoder->read_boolean(v, col->in) != + common::E_OK) + break; + out[cb.val_count++] = v; + } + break; + } + case common::INT32: + case common::DATE: + col->decoder->read_batch_int32( + reinterpret_cast(cb.val_buf), + cb.nonnull_count, cb.val_count, col->in); + break; + case common::INT64: + case common::TIMESTAMP: + col->decoder->read_batch_int64( + reinterpret_cast(cb.val_buf), + cb.nonnull_count, cb.val_count, col->in); + break; + case common::FLOAT: + col->decoder->read_batch_float( + reinterpret_cast(cb.val_buf), + cb.nonnull_count, cb.val_count, col->in); + break; + case common::DOUBLE: + col->decoder->read_batch_double( + reinterpret_cast(cb.val_buf), + cb.nonnull_count, cb.val_count, col->in); + break; + default: + // STRING handled below in scatter loop + break; + } + } + } + + // ── Phase 4: Skip if no rows pass ── + if (pass_count == 0) { + for (uint32_t c = 0; c < num_cols; c++) { + value_columns_[c]->cur_value_index += time_count; + } + continue; + } + + // ── Phase 5: Scatter into TsBlock ── + + // Fast path: all rows pass filter AND all columns have no nulls + // → batch memcpy directly into Vector buffers. + if (pass_count == time_count) { + bool all_nonnull = true; + for (uint32_t c = 0; c < num_cols; c++) { + if (col_batches[c].nonnull_count != time_count) { + all_nonnull = false; + break; + } + } + if (all_nonnull) { + // Batch append time column + common::Vector* time_vec = ret_tsblock->get_vector(0); + time_vec->get_value_data().append_fixed_value( + (const char*)times, + static_cast(time_count) * sizeof(int64_t)); + // Batch append each value column + for (uint32_t c = 0; c < num_cols; c++) { + auto& cb = col_batches[c]; + auto* col = value_columns_[c]; + uint32_t elem_size = common::get_data_type_size( + col->chunk_header.data_type_); + common::Vector* vec = ret_tsblock->get_vector(c + 1); + vec->get_value_data().append_fixed_value( + cb.val_buf, + static_cast(cb.val_count) * elem_size); + col->cur_value_index += time_count; + } + row_appender.add_rows(static_cast(time_count)); + continue; + } + } + + // Slow path: per-row scatter (has filter or has nulls) + std::vector val_idx(num_cols, 0); + + for (int i = 0; i < time_count; i++) { + bool passes = block_all_pass || time_mask[i]; + + if (!passes) { + for (uint32_t c = 0; c < num_cols; c++) { + value_columns_[c]->cur_value_index++; + if (!col_batches[c].is_null[i]) val_idx[c]++; + } + continue; + } + + if (UNLIKELY(!row_appender.add_row())) { + ret = E_OVERFLOW; + break; + } + + row_appender.append(0, (char*)×[i], sizeof(int64_t)); + + for (uint32_t c = 0; c < num_cols; c++) { + value_columns_[c]->cur_value_index++; + auto& cb = col_batches[c]; + auto* col = value_columns_[c]; + + if (cb.is_null[i]) { + row_appender.append_null(c + 1); + } else { + uint32_t elem_size = common::get_data_type_size( + col->chunk_header.data_type_); + row_appender.append( + c + 1, cb.val_buf + val_idx[c] * elem_size, elem_size); + val_idx[c]++; + } + } + } + if (ret != E_OK) break; } return ret; } -} // end namespace storage \ No newline at end of file +} // end namespace storage diff --git a/cpp/src/reader/aligned_chunk_reader.h b/cpp/src/reader/aligned_chunk_reader.h index 91281215e..85e67dfcf 100644 --- a/cpp/src/reader/aligned_chunk_reader.h +++ b/cpp/src/reader/aligned_chunk_reader.h @@ -28,8 +28,57 @@ #include "reader/filter/filter.h" #include "reader/ichunk_reader.h" +#ifdef ENABLE_THREADS +namespace common { +class ThreadPool; +} +#endif + namespace storage { +// Page classification for chunk-level parallel decode. +enum class PagePassType { SKIP, FULL_PASS, BOUNDARY }; + +// Metadata collected per page during the chunk scan phase. +struct ChunkPageInfo { + PagePassType pass_type = PagePassType::SKIP; + // File offsets of compressed data for time and each value column. + int64_t time_file_offset = 0; + uint32_t time_compressed_size = 0; + uint32_t time_uncompressed_size = 0; + int32_t row_begin = 0; // inclusive + int32_t row_end = 0; // exclusive + std::vector value_file_offsets; + std::vector value_compressed_sizes; + std::vector value_uncompressed_sizes; +}; + +// Per-value-column state for multi-value AlignedChunkReader. +struct ValueColumnState { + ChunkMeta* chunk_meta = nullptr; + ChunkHeader chunk_header; + Decoder* decoder = nullptr; + Compressor* compressor = nullptr; + common::ByteStream in_stream; // raw data from file + common::ByteStream in; // decompressed data + char* uncompressed_buf = nullptr; + int32_t file_data_buf_size = 0; + uint32_t chunk_visit_offset = 0; + PageHeader cur_page_header; + std::vector notnull_bitmap; + int32_t cur_value_index = -1; + + // Pre-decoded value buffer for parallel page-level decode. + // Fixed-length types are fully decoded here so the scatter phase + // does a memcpy instead of calling the decoder. + std::vector predecoded_values; + int predecoded_count = 0; // number of non-null values decoded + int predecoded_read_pos = 0; // scatter cursor (advances across E_OVERFLOW) + bool predecoded = false; // true when values are pre-decoded + common::PageArena predecode_pa; + std::vector predecoded_strings; +}; + class AlignedChunkReader : public IChunkReader { public: AlignedChunkReader() @@ -64,11 +113,13 @@ class AlignedChunkReader : public IChunkReader { ~AlignedChunkReader() override = default; bool has_more_data() const override { - return prev_value_page_not_finish() || + if (multi_value_mode_) { + return has_more_data_multi(); + } + return prev_value_page_not_finish() || prev_time_page_not_finish() || (value_chunk_visit_offset_ - value_chunk_header_.serialized_size_ < value_chunk_header_.data_size_) || - prev_time_page_not_finish() || (time_chunk_visit_offset_ - time_chunk_header_.serialized_size_ < time_chunk_header_.data_size_); } @@ -76,13 +127,36 @@ class AlignedChunkReader : public IChunkReader { int load_by_aligned_meta(ChunkMeta* time_meta, ChunkMeta* value_meta) override; + // Multi-value: load one time chunk + N value chunks. + int load_by_aligned_meta_multi(ChunkMeta* time_meta, + const std::vector& value_metas); + int get_next_page(common::TsBlock* tsblock, Filter* oneshoot_filter, common::PageArena& pa) override; - int get_next_page(common::TsBlock* tsblock, Filter* oneshoot_filter, common::PageArena& pa, int64_t min_time_hint, int& row_offset, int& row_limit) override; + // Multi-value: get the number of value columns. + uint32_t get_value_column_count() const { + return multi_value_mode_ ? value_columns_.size() : 1; + } + + // Multi-value: get chunk header for a specific value column. + ChunkHeader& get_value_chunk_header(uint32_t col) { + if (multi_value_mode_ && col < value_columns_.size()) { + return value_columns_[col]->chunk_header; + } + return value_chunk_header_; + } + + bool is_multi_value_mode() const { return multi_value_mode_; } + +#ifdef ENABLE_THREADS + // Set external thread pool for parallel decode (not owned). + void set_decode_pool(common::ThreadPool* pool) { decode_pool_ = pool; } +#endif + private: bool should_skip_page_by_time(int64_t min_time_hint); bool should_skip_page_by_offset(int& row_offset); @@ -100,7 +174,8 @@ class AlignedChunkReader : public IChunkReader { common::ByteStream& in_stream_, PageHeader& cur_page_header_, uint32_t& chunk_visit_offset, - ChunkHeader& chunk_header); + ChunkHeader& chunk_header, + int32_t* override_buf_size = nullptr); int read_from_file_and_rewrap(common::ByteStream& in_stream_, ChunkMeta*& chunk_meta, uint32_t& chunk_visit_offset, @@ -114,6 +189,7 @@ class AlignedChunkReader : public IChunkReader { Filter* filter, common::PageArena* pa); bool prev_time_page_not_finish() const { + if (time_predecoded_) return page_time_cursor_ < page_time_count_; return (time_decoder_ && time_decoder_->has_remaining(time_in_)) || time_in_.has_remaining(); } @@ -132,58 +208,112 @@ class AlignedChunkReader : public IChunkReader { common::ByteStream& value_in, common::RowAppender& row_appender, Filter* filter); + int i32_DECODE_TV_BATCH(common::ByteStream& time_in, + common::ByteStream& value_in, + common::RowAppender& row_appender, Filter* filter); + int i64_DECODE_TV_BATCH(common::ByteStream& time_in, + common::ByteStream& value_in, + common::RowAppender& row_appender, Filter* filter); + int float_DECODE_TV_BATCH(common::ByteStream& time_in, + common::ByteStream& value_in, + common::RowAppender& row_appender, + Filter* filter); + int double_DECODE_TV_BATCH(common::ByteStream& time_in, + common::ByteStream& value_in, + common::RowAppender& row_appender, + Filter* filter); int STRING_DECODE_TYPED_TV_INTO_TSBLOCK(common::ByteStream& time_in, common::ByteStream& value_in, common::RowAppender& row_appender, common::PageArena& pa, Filter* filter); + // ── Multi-value private methods (page-level, serial fallback) ──────── + bool has_more_data_multi() const; + bool prev_any_value_page_not_finish_multi() const; + int get_next_page_multi(common::TsBlock* ret_tsblock, + Filter* oneshoot_filter, common::PageArena& pa); + int get_next_page_multi_serial(common::TsBlock* ret_tsblock, Filter* filter, + common::PageArena& pa); + int skip_cur_page_multi(); + bool cur_page_statisify_filter_multi(Filter* filter); + int decode_cur_value_pages_multi(); + int decode_cur_value_page_data_for(ValueColumnState& col); + int ensure_value_page_loaded(ValueColumnState& col); + static int decompress_and_parse_value_page(ValueColumnState& col); + void predecode_all_timestamps(); + int decode_time_value_buf_into_tsblock_multi(common::TsBlock*& ret_tsblock, + Filter* filter, + common::PageArena* pa); + int multi_DECODE_TV_BATCH(common::TsBlock* ret_tsblock, + common::RowAppender& row_appender, Filter* filter, + common::PageArena* pa); + int build_page_plan(Filter* filter); + int decode_time_page_direct(const ChunkPageInfo& page_info, + std::vector& out_times); + int load_current_planned_page(); + int predecode_value_page_for_plan(uint32_t col_idx, + const ChunkPageInfo& page_info); + int scatter_current_page(common::TsBlock* ret_tsblock, + common::RowAppender& row_appender, + common::PageArena* pa); + void release_current_page_state(); + bool has_variable_length_value_column() const; + int count_non_null_prefix(const std::vector& bitmap, + int32_t row_limit) const; + private: ReadFile* read_file_; + // ── Single-value mode fields (kept for backward compat) ────────────── ChunkMeta* time_chunk_meta_; ChunkMeta* value_chunk_meta_; common::String measurement_name_; ChunkHeader time_chunk_header_; - // TODO: support reading more than one measurement in AlignedChunkReader. ChunkHeader value_chunk_header_; PageHeader cur_time_page_header_; PageHeader cur_value_page_header_; - /* - * Data reader from file is stored in @in_stream_, and the size - * is stored in @file_data_buf_size_. Note, in_stream_.total_size_ - * is used to limit deserialization, that is why we still have - * @file_data_buf_size_. - * - * Since we may want keep data of current page (and page header - * of next page) in memory, we need a byte-size cursor to tell - * us which byte we are processing, so we have @chunk_visit_offset_ - * it refer to position from the start of chunk_header_, - * also refer to offset within the chunk (including chunk header). - * It advanced by step of a page header or a page tv data. - */ - common::ByteStream time_in_stream_{common::MOD_CHUNK_READER}; - common::ByteStream value_in_stream_{common::MOD_CHUNK_READER}; + common::ByteStream time_in_stream_; + common::ByteStream value_in_stream_; int32_t file_data_time_buf_size_; int32_t file_data_value_buf_size_; uint32_t time_chunk_visit_offset_; uint32_t value_chunk_visit_offset_; - // Statistic *page_statistic_; Compressor* time_compressor_; Compressor* value_compressor_; Filter* time_filter_; Decoder* time_decoder_; Decoder* value_decoder_; - common::ByteStream time_in_{common::MOD_CHUNK_READER}; - common::ByteStream value_in_{common::MOD_CHUNK_READER}; + common::ByteStream time_in_; + common::ByteStream value_in_; char* time_uncompressed_buf_; char* value_uncompressed_buf_; std::vector value_page_col_notnull_bitmap_; uint32_t value_page_data_num_; int32_t cur_value_index; + + // ── Multi-value mode fields ────────────────────────────────────────── + bool multi_value_mode_ = false; + std::vector value_columns_; + + // Pre-decoded timestamps for page-level parallel decode. + std::vector page_all_times_; + int page_time_count_ = 0; + int page_time_cursor_ = 0; + bool time_predecoded_ = false; + + // ── Page-plan state ──────────────────────────────────────────────── + std::vector chunk_pages_; + bool page_plan_built_ = false; + bool current_page_loaded_ = false; + size_t current_page_plan_index_ = 0; + +#ifdef ENABLE_THREADS + common::ThreadPool* decode_pool_ = nullptr; // borrowed, not owned +#endif }; } // end namespace storage -#endif // READER_CHUNK_READER_H +#endif // READER_CHUNK_ALIGNED_READER_H diff --git a/cpp/src/reader/block/single_device_tsblock_reader.cc b/cpp/src/reader/block/single_device_tsblock_reader.cc index 93f42efd3..8c0ab9a77 100644 --- a/cpp/src/reader/block/single_device_tsblock_reader.cc +++ b/cpp/src/reader/block/single_device_tsblock_reader.cc @@ -19,6 +19,12 @@ #include "single_device_tsblock_reader.h" +#include +#include +#include + +#include "common/db_common.h" + namespace storage { SingleDeviceTsBlockReader::SingleDeviceTsBlockReader( @@ -63,7 +69,7 @@ int32_t SingleDeviceTsBlockReader::compute_dense_row_count( int64_t time_count = 0; int64_t value_count = 0; - if (ts_index->is_aligned()) { + if (ts_index->get_data_type() == common::VECTOR) { auto* time_list = ts_index->get_time_chunk_meta_list(); auto* value_list = ts_index->get_value_chunk_meta_list(); if (time_list == nullptr || value_list == nullptr) { @@ -149,32 +155,130 @@ int SingleDeviceTsBlockReader::init_internal(DeviceQueryTask* device_query_task, time_series_indexs, pa_))) { return ret; } - dense_row_count_ = compute_dense_row_count(time_series_indexs); - - if (dense_row_count_ >= 0 && remaining_offset_ >= dense_row_count_) { - remaining_offset_ -= dense_row_count_; - delete current_block_; - current_block_ = nullptr; - return common::E_OK; + // Table queries allow sparse aligned fields. Until dense-ness can be + // proven robustly for a device, fall back to the per-column merge path. + const bool enable_dense_aligned_fast_path = false; + // Early device-level time skip: if time_filter is set and ALL chunks of + // this device have statistics that fall outside the filter range, skip the + // entire device. Chunks without statistics are assumed to satisfy. + if (time_filter != nullptr) { + bool all_outside = true; + for (const auto* ts_idx : time_series_indexs) { + if (ts_idx == nullptr) continue; + auto* chunk_list = (ts_idx->get_data_type() == common::VECTOR) + ? ts_idx->get_time_chunk_meta_list() + : ts_idx->get_chunk_meta_list(); + if (chunk_list == nullptr) { + all_outside = false; + break; + } + for (auto it = chunk_list->begin(); it != chunk_list->end(); it++) { + if (it.get()->statistic_ == nullptr || + time_filter->satisfy(it.get()->statistic_)) { + all_outside = false; + break; + } + } + if (!all_outside) break; + } + if (all_outside) { + // No data in this device matches the time filter. + delete current_block_; + current_block_ = nullptr; + return common::E_OK; + } } + // Try multi-value aligned path: one SSI reads all aligned value columns + // at once, even for a single column. This is valid for sparse aligned + // fields; the merge layer must simply avoid visiting the shared context + // more than once. + bool used_multi = false; + std::set multi_names; + { + bool can_multi = true; + auto& meas_cols = + device_query_task->get_column_mapping()->get_measurement_columns(); + for (const auto& ts_idx : time_series_indexs) { + if (ts_idx == nullptr || + ts_idx->get_data_type() != common::VECTOR) { + can_multi = false; + break; + } + } + if (can_multi) { + std::vector meas_names(meas_cols.begin(), + meas_cols.end()); + std::sort( + meas_names.begin(), meas_names.end(), + [device_query_task](const std::string& lhs, + const std::string& rhs) { + const auto& lhs_pos = + device_query_task->get_column_mapping()->get_column_pos( + lhs); + const auto& rhs_pos = + device_query_task->get_column_mapping()->get_column_pos( + rhs); + const int lhs_first = + lhs_pos.empty() ? INT32_MAX : lhs_pos.front(); + const int rhs_first = + rhs_pos.empty() ? INT32_MAX : rhs_pos.front(); + if (lhs_first != rhs_first) { + return lhs_first < rhs_first; + } + return lhs < rhs; + }); + std::vector> pos_list; + pos_list.reserve(meas_names.size()); + for (const auto& name : meas_names) { + const auto& pos = + device_query_task->get_column_mapping()->get_column_pos( + name); + pos_list.push_back( + std::vector(pos.begin(), pos.end())); + } - int ssi_offset = 0; - int ssi_limit = -1; - if (dense_row_count_ >= 0) { - ssi_offset = remaining_offset_; - ssi_limit = remaining_limit_; + auto* ctx = new VectorMeasurementColumnContext(tsfile_io_reader_); + if (common::E_OK == ctx->init(device_query_task_, meas_names, + time_filter, pos_list, pa_)) { + for (const auto& name : meas_names) { + field_column_contexts_.insert(std::make_pair(name, ctx)); + multi_names.insert(name); + } + aligned_col_count_ = meas_names.size(); + used_multi = true; + } else { + delete ctx; + } + } } for (const auto& time_series_index : time_series_indexs) { - construct_column_context(time_series_index, time_filter, ssi_offset, - ssi_limit); + if (time_series_index == nullptr) { + continue; + } + const std::string measurement_name = + time_series_index->get_measurement_name().to_std_string(); + if (used_multi && multi_names.count(measurement_name) > 0) { + continue; + } + construct_column_context(time_series_index, time_filter, 0, -1); } - if (dense_row_count_ >= 0 && !field_column_contexts_.empty()) { - auto* first_ctx = field_column_contexts_.begin()->second; - remaining_offset_ = first_ctx->get_ssi_row_offset(); - remaining_limit_ = first_ctx->get_ssi_row_limit(); + // Detect aligned fast path: every field column comes from an aligned chunk. + if (!field_column_contexts_.empty() && enable_dense_aligned_fast_path && + dense_row_count_ >= 0 && + aligned_col_count_ == field_column_contexts_.size()) { + all_aligned_ = true; + aligned_vec_.reserve(field_column_contexts_.size()); + if (used_multi) { + // Single VectorMeasurementColumnContext handles all columns. + aligned_vec_.push_back(field_column_contexts_.begin()->second); + } else { + for (auto& kv : field_column_contexts_) { + aligned_vec_.push_back(kv.second); + } + } } if (field_column_contexts_.empty()) { @@ -218,18 +322,25 @@ int SingleDeviceTsBlockReader::has_next(bool& has_next) { current_block_->reset(); - uint32_t effective_block_size = block_size_; - if (remaining_limit_ > 0) { - effective_block_size = - std::min(block_size_, static_cast(remaining_limit_)); + if (all_aligned_) { + return has_next_aligned(has_next); } bool next_time_set = false; next_time_ = -1; std::vector min_time_columns; - while (current_block_->get_row_count() < effective_block_size) { + while (current_block_->get_row_count() < block_size_) { + if (remaining_limit_ > 0 && + current_block_->get_row_count() >= + static_cast(remaining_limit_)) { + break; + } + std::set visited_contexts; for (auto& column_context : field_column_contexts_) { + if (!visited_contexts.insert(column_context.second).second) { + continue; + } int64_t time; if (IS_FAIL(column_context.second->get_current_time(time))) { continue; @@ -293,6 +404,101 @@ int SingleDeviceTsBlockReader::has_next(bool& has_next) { return ret; } +int SingleDeviceTsBlockReader::has_next_aligned(bool& result_has_next) { + int ret = common::E_OK; + int time_in_query_index = tuple_desc_.get_time_column_index(); + + while (current_block_->get_row_count() < block_size_) { + if (aligned_vec_.empty()) break; + + if (remaining_limit_ == 0) break; + + // Check if first column has data. + uint32_t avail = aligned_vec_[0]->available_rows(); + if (avail == 0) { + for (auto* ctx : aligned_vec_) { + ctx->remove_from(field_column_contexts_); + } + aligned_vec_.clear(); + break; + } + + // Find the batch size: min of output capacity and all SSI + // availabilities. + uint32_t batch = block_size_ - current_block_->get_row_count(); + for (auto* ctx : aligned_vec_) { + uint32_t ctx_avail = ctx->available_rows(); + if (ctx_avail == 0) { + batch = 0; + break; + } + if (ctx_avail < batch) batch = ctx_avail; + } + if (batch == 0) { + for (auto* ctx : aligned_vec_) { + ctx->remove_from(field_column_contexts_); + } + aligned_vec_.clear(); + break; + } + + // Handle offset: skip rows before copying. + if (remaining_offset_ > 0) { + uint32_t skip = std::min(batch, (uint32_t)remaining_offset_); + for (auto* ctx : aligned_vec_) { + ctx->skip_rows(skip); + } + remaining_offset_ -= skip; + continue; + } + + // Handle limit: cap the batch size. + if (remaining_limit_ > 0) { + batch = std::min(batch, (uint32_t)remaining_limit_); + } + + // First SSI: bulk copy time + values + row_count. + int copy_ret = aligned_vec_[0]->bulk_copy_into( + col_appenders_, col_appenders_[time_column_index_], row_appender_, + batch); + + // Also copy time to explicit time column if requested. + if (time_in_query_index != -1) { + common::Vector* time_vec = + current_block_->get_vector(time_column_index_); + char* time_src = + time_vec->get_value_data().get_data() + + (current_block_->get_row_count() - batch) * sizeof(int64_t); + col_appenders_[time_in_query_index]->bulk_append_fixed( + time_src, batch, sizeof(int64_t)); + } + + // Other SSIs: bulk copy values only (no time, no row_count). + for (size_t i = 1; i < aligned_vec_.size(); i++) { + aligned_vec_[i]->bulk_copy_into(col_appenders_, nullptr, nullptr, + batch); + } + + // Decrement limit for data already copied. + if (remaining_limit_ > 0) { + remaining_limit_ -= batch; + } + + // If first SSI signaled no-more-data, stop after accounting. + if (copy_ret != common::E_OK) break; + } + + if (current_block_->get_row_count() > 0) { + if (RET_FAIL(fill_ids())) return ret; + current_block_->fill_trailling_nulls(); + last_block_returned_ = false; + result_has_next = true; + } else { + result_has_next = false; + } + return ret; +} + int SingleDeviceTsBlockReader::fill_measurements( std::vector& column_contexts) { int ret = common::E_OK; @@ -400,8 +606,15 @@ int SingleDeviceTsBlockReader::next(common::TsBlock*& ret_block) { } void SingleDeviceTsBlockReader::close() { + aligned_vec_.clear(); // non-owning; owned by field_column_contexts_ + // De-duplicate pointers before deleting: VectorMeasurementColumnContext + // has multiple map entries pointing to the same object. + std::set unique_contexts; for (auto& column_context : field_column_contexts_) { - delete column_context.second; + unique_contexts.insert(column_context.second); + } + for (auto* ctx : unique_contexts) { + delete ctx; } for (auto& col_appender : col_appenders_) { if (col_appender) { @@ -413,9 +626,7 @@ void SingleDeviceTsBlockReader::close() { delete row_appender_; row_appender_ = nullptr; } - if (device_query_task_) { - device_query_task_->~DeviceQueryTask(); - } + device_query_task_ = nullptr; // owned by the task iterator arena if (current_block_) { delete current_block_; current_block_ = nullptr; @@ -427,9 +638,11 @@ int SingleDeviceTsBlockReader::construct_column_context( int ssi_offset, int ssi_limit) { int ret = common::E_OK; if (time_series_index == nullptr || - (!time_series_index->is_aligned() && + (time_series_index->get_data_type() != common::TSDataType::VECTOR && time_series_index->get_chunk_meta_list()->empty())) { - } else if (time_series_index->is_aligned()) { + } else if (time_series_index->get_data_type() == common::VECTOR) { + const int effective_ssi_offset = dense_row_count_ >= 0 ? ssi_offset : 0; + const int effective_ssi_limit = dense_row_count_ >= 0 ? ssi_limit : -1; const AlignedTimeseriesIndex* aligned_time_series_index = dynamic_cast(time_series_index); if (aligned_time_series_index == nullptr) { @@ -441,13 +654,14 @@ int SingleDeviceTsBlockReader::construct_column_context( device_query_task_, time_series_index, time_filter, device_query_task_->get_column_mapping()->get_column_pos( time_series_index->get_measurement_name().to_std_string()), - pa_, ssi_offset, ssi_limit))) { + pa_, effective_ssi_offset, effective_ssi_limit))) { delete column_context; return ret; } field_column_contexts_.insert(std::make_pair( time_series_index->get_measurement_name().to_std_string(), column_context)); + aligned_col_count_++; } else { SingleMeasurementColumnContext* column_context = new SingleMeasurementColumnContext(tsfile_io_reader_); @@ -568,4 +782,323 @@ void SingleMeasurementColumnContext::fill_into( } } +uint32_t SingleMeasurementColumnContext::available_rows() const { + if (!time_iter_ || time_iter_->end()) return 0; + return time_iter_->remaining(); +} + +int SingleMeasurementColumnContext::bulk_copy_into( + std::vector& col_appenders, + common::ColAppender* time_appender, common::RowAppender* row_appender, + uint32_t count) { + int ret = common::E_OK; + const uint32_t time_elem_size = sizeof(int64_t); + auto dt = value_iter_->get_data_type(); + bool is_varlen = + (dt == common::STRING || dt == common::TEXT || dt == common::BLOB); + + // Bulk copy time column (only first SSI does this). + if (time_appender) { + time_appender->bulk_append_fixed(time_iter_->data_ptr(), count, + time_elem_size); + } + + // Advance output row count (only first SSI does this). + if (row_appender) { + row_appender->add_rows(count); + } + + if (is_varlen || value_iter_->has_null()) { + for (uint32_t r = 0; r < count; r++) { + uint32_t len = 0; + bool is_null = false; + char* val = value_iter_->read(&len, &is_null); + for (int32_t pos : pos_in_result_) { + auto* appender = col_appenders[pos + 1]; + appender->add_row(); + if (is_null) { + appender->append_null(); + } else { + appender->append(val, len); + } + } + value_iter_->next(); + } + } else { + const uint32_t val_elem_size = common::get_data_type_size(dt); + char* val_ptr = value_iter_->data_ptr(); + for (int32_t pos : pos_in_result_) { + col_appenders[pos + 1]->bulk_append_fixed(val_ptr, count, + val_elem_size); + } + value_iter_->advance(count, val_elem_size); + } + + // Advance source iterators. + time_iter_->advance(count, time_elem_size); + + // If source TsBlock exhausted, load next. + if (time_iter_->end()) { + if (RET_FAIL(get_next_tsblock(false))) { + return ret; + } + } + return ret; +} + +void SingleMeasurementColumnContext::skip_rows(uint32_t count) { + if (!time_iter_ || time_iter_->end()) return; + const uint32_t time_elem_size = sizeof(int64_t); + auto dt = value_iter_->get_data_type(); + bool is_varlen = + (dt == common::STRING || dt == common::TEXT || dt == common::BLOB); + uint32_t to_skip = std::min(count, time_iter_->remaining()); + time_iter_->advance(to_skip, time_elem_size); + if (is_varlen || value_iter_->has_null()) { + for (uint32_t r = 0; r < to_skip; r++) { + value_iter_->next(); + } + } else { + const uint32_t val_elem_size = common::get_data_type_size(dt); + value_iter_->advance(to_skip, val_elem_size); + } + if (time_iter_->end()) { + get_next_tsblock(false); + } +} + +// ── VectorMeasurementColumnContext implementation ─────────────────────── + +VectorMeasurementColumnContext::~VectorMeasurementColumnContext() { + if (time_iter_) { + delete time_iter_; + time_iter_ = nullptr; + } + for (auto* vi : value_iters_) { + if (vi) delete vi; + } + value_iters_.clear(); + if (ssi_) { + ssi_->revert_tsblock(); + } + tsfile_io_reader_->revert_ssi(ssi_); + ssi_ = nullptr; +} + +int VectorMeasurementColumnContext::init( + DeviceQueryTask* device_query_task, + const std::vector& measurement_names, Filter* time_filter, + std::vector>& pos_in_result, common::PageArena& pa) { + int ret = common::E_OK; + pos_in_result_ = pos_in_result; + column_names_ = measurement_names; + if (RET_FAIL(tsfile_io_reader_->alloc_multi_ssi( + device_query_task->get_device_id(), measurement_names, ssi_, pa, + time_filter))) { + return ret; + } + if (RET_FAIL(get_next_tsblock(true))) { + return ret; + } + return ret; +} + +int VectorMeasurementColumnContext::get_next_tsblock(bool alloc_mem) { + int ret = common::E_OK; + if (tsblock_ != nullptr) { + if (time_iter_) { + delete time_iter_; + time_iter_ = nullptr; + } + for (auto* vi : value_iters_) { + if (vi) delete vi; + } + value_iters_.clear(); + tsblock_->reset(); + } + if (RET_FAIL(ssi_->get_next(tsblock_, alloc_mem))) { + if (time_iter_) { + delete time_iter_; + time_iter_ = nullptr; + } + for (auto* vi : value_iters_) { + if (vi) delete vi; + } + value_iters_.clear(); + if (tsblock_) { + ssi_->destroy(); + tsblock_ = nullptr; + } + } else { + time_iter_ = new common::ColIterator(0, tsblock_); + uint32_t num_value_cols = tsblock_->get_column_count() - 1; + value_iters_.reserve(num_value_cols); + for (uint32_t c = 0; c < num_value_cols; c++) { + value_iters_.push_back(new common::ColIterator(c + 1, tsblock_)); + } + } + return ret; +} + +int VectorMeasurementColumnContext::get_current_time(int64_t& time) { + if (!time_iter_ || time_iter_->end()) return common::E_NO_MORE_DATA; + uint32_t len = 0; + time = *(int64_t*)(time_iter_->read(&len)); + return common::E_OK; +} + +int VectorMeasurementColumnContext::get_current_value(char*& value, + uint32_t& len) { + if (value_iters_.empty() || value_iters_[0]->end()) + return common::E_NO_MORE_DATA; + bool is_null = false; + value = value_iters_[0]->read(&len, &is_null); + return common::E_OK; +} + +int VectorMeasurementColumnContext::move_iter() { + int ret = common::E_OK; + time_iter_->next(); + for (auto* vi : value_iters_) vi->next(); + if (time_iter_->end()) { + if (RET_FAIL(get_next_tsblock(false))) return ret; + } + return ret; +} + +void VectorMeasurementColumnContext::fill_into( + std::vector& col_appenders) { + for (uint32_t c = 0; c < value_iters_.size() && c < pos_in_result_.size(); + c++) { + uint32_t len = 0; + bool is_null = false; + char* val = value_iters_[c]->read(&len, &is_null); + for (int32_t pos : pos_in_result_[c]) { + col_appenders[pos + 1]->add_row(); + if (is_null) { + col_appenders[pos + 1]->append_null(); + } else { + col_appenders[pos + 1]->append(val, len); + } + } + } +} + +void VectorMeasurementColumnContext::remove_from( + std::map& column_context_map) { + for (const auto& name : column_names_) { + column_context_map.erase(name); + } + delete this; +} + +uint32_t VectorMeasurementColumnContext::available_rows() const { + if (!time_iter_ || time_iter_->end()) return 0; + return time_iter_->remaining(); +} + +int VectorMeasurementColumnContext::bulk_copy_into( + std::vector& col_appenders, + common::ColAppender* time_appender, common::RowAppender* row_appender, + uint32_t count) { + int ret = common::E_OK; + const uint32_t time_elem_size = sizeof(int64_t); + + // Bulk copy time column (only when time_appender is provided). + if (time_appender) { + time_appender->bulk_append_fixed(time_iter_->data_ptr(), count, + time_elem_size); + } + + // Advance output row count. + if (row_appender) { + row_appender->add_rows(count); + } + + // Bulk copy each value column to its output positions, propagating nulls. + for (uint32_t c = 0; c < value_iters_.size() && c < pos_in_result_.size(); + c++) { + auto dt = value_iters_[c]->get_data_type(); + bool is_varlen = + (dt == common::STRING || dt == common::TEXT || dt == common::BLOB); + bool src_has_null = value_iters_[c]->has_null(); + + if (is_varlen || src_has_null) { + // Row-by-row copy for variable-length columns using the + // ColIterator next()/read() which properly tracks offsets. Fixed + // length columns with nulls also need this path because their + // payload buffer only stores non-null values. + auto* iter = value_iters_[c]; + for (uint32_t r = 0; r < count; r++) { + uint32_t len = 0; + bool is_null = false; + char* val = iter->read(&len, &is_null); + for (int32_t pos : pos_in_result_[c]) { + auto* appender = col_appenders[pos + 1]; + appender->add_row(); + if (is_null) { + appender->append_null(); + } else { + appender->append(val, len); + } + } + iter->next(); + } + } else { + // Bulk copy for fixed-length columns + uint32_t val_elem_size = common::get_data_type_size(dt); + char* val_ptr = value_iters_[c]->data_ptr(); + for (int32_t pos : pos_in_result_[c]) { + col_appenders[pos + 1]->bulk_append_fixed(val_ptr, count, + val_elem_size); + } + } + } + + // Advance all source iterators. + time_iter_->advance(count, time_elem_size); + for (uint32_t c = 0; c < value_iters_.size(); c++) { + auto dt = value_iters_[c]->get_data_type(); + bool is_varlen = + (dt == common::STRING || dt == common::TEXT || dt == common::BLOB); + if (!is_varlen && !value_iters_[c]->has_null()) { + uint32_t val_elem_size = common::get_data_type_size(dt); + value_iters_[c]->advance(count, val_elem_size); + } + // Variable-length iterators and fixed-length iterators with nulls were + // already advanced in the copy loop above. + } + + // If source TsBlock exhausted, load next. + if (time_iter_->end()) { + if (RET_FAIL(get_next_tsblock(false))) return ret; + } + return ret; +} + +void VectorMeasurementColumnContext::skip_rows(uint32_t count) { + if (!time_iter_ || time_iter_->end()) return; + const uint32_t time_elem_size = sizeof(int64_t); + uint32_t to_skip = std::min(count, time_iter_->remaining()); + time_iter_->advance(to_skip, time_elem_size); + for (uint32_t c = 0; c < value_iters_.size(); c++) { + auto dt = value_iters_[c]->get_data_type(); + bool is_varlen = + (dt == common::STRING || dt == common::TEXT || dt == common::BLOB); + if (!is_varlen && !value_iters_[c]->has_null()) { + uint32_t val_elem_size = common::get_data_type_size(dt); + value_iters_[c]->advance(to_skip, val_elem_size); + } else { + // Variable-length and fixed-length-with-null vectors need next() + // to keep the payload offset aligned with non-null rows. + for (uint32_t r = 0; r < to_skip; r++) { + value_iters_[c]->next(); + } + } + } + if (time_iter_->end()) { + get_next_tsblock(false); + } +} + } // namespace storage diff --git a/cpp/src/reader/block/single_device_tsblock_reader.h b/cpp/src/reader/block/single_device_tsblock_reader.h index 07d16860c..9a9210667 100644 --- a/cpp/src/reader/block/single_device_tsblock_reader.h +++ b/cpp/src/reader/block/single_device_tsblock_reader.h @@ -65,6 +65,9 @@ class SingleDeviceTsBlockReader : public TsBlockReader { int advance_column(MeasurementColumnContext* column_context); int32_t compute_dense_row_count( const std::vector& ts_indexes); + // Fast path for aligned data: all columns share the same timestamps, + // so no per-row merge-sort is needed. + int has_next_aligned(bool& has_next); DeviceQueryTask* device_query_task_; Filter* field_filter_; @@ -83,6 +86,11 @@ class SingleDeviceTsBlockReader : public TsBlockReader { int remaining_offset_ = 0; int remaining_limit_ = -1; int32_t dense_row_count_ = -1; + // Populated in init() when every field column comes from an aligned chunk. + // Provides cache-friendly vector iteration for has_next_aligned(). + bool all_aligned_ = false; + uint32_t aligned_col_count_ = 0; + std::vector aligned_vec_; }; class MeasurementColumnContext { @@ -116,6 +124,13 @@ class MeasurementColumnContext { return ssi_ ? ssi_->get_row_limit() : -1; } + virtual uint32_t available_rows() const = 0; + virtual int bulk_copy_into(std::vector& col_appenders, + common::ColAppender* time_appender, + common::RowAppender* row_appender, + uint32_t count) = 0; + virtual void skip_rows(uint32_t count) = 0; + protected: TsFileIOReader* tsfile_io_reader_; TsFileSeriesScanIterator* ssi_ = nullptr; @@ -155,6 +170,12 @@ class SingleMeasurementColumnContext final : public MeasurementColumnContext { int get_current_time(int64_t& time) override; int get_current_value(char*& value, uint32_t& len) override; int move_iter() override; + uint32_t available_rows() const override; + int bulk_copy_into(std::vector& col_appenders, + common::ColAppender* time_appender, + common::RowAppender* row_appender, + uint32_t count) override; + void skip_rows(uint32_t count) override; private: std::string column_name_; @@ -165,21 +186,31 @@ class VectorMeasurementColumnContext final : public MeasurementColumnContext { public: explicit VectorMeasurementColumnContext(TsFileIOReader* tsfile_io_reader) : MeasurementColumnContext(tsfile_io_reader) {} + ~VectorMeasurementColumnContext() override; void fill_into(std::vector& col_appenders) override; void remove_from(std::map& column_context_map) override; int init(DeviceQueryTask* device_query_task, - const ITimeseriesIndex* time_series_index, Filter* time_filter, + const std::vector& measurement_names, + Filter* time_filter, std::vector>& pos_in_result, common::PageArena& pa); int get_next_tsblock(bool alloc_mem) override; int get_current_time(int64_t& time) override; int get_current_value(char*& value, uint32_t& len) override; int move_iter() override; + uint32_t available_rows() const override; + int bulk_copy_into(std::vector& col_appenders, + common::ColAppender* time_appender, + common::RowAppender* row_appender, + uint32_t count) override; + void skip_rows(uint32_t count) override; private: + std::vector column_names_; std::vector> pos_in_result_; + std::vector value_iters_; }; class IdColumnContext { diff --git a/cpp/src/reader/bloom_filter.cc b/cpp/src/reader/bloom_filter.cc index 068c96e27..4aff4ecd3 100644 --- a/cpp/src/reader/bloom_filter.cc +++ b/cpp/src/reader/bloom_filter.cc @@ -208,6 +208,26 @@ int BloomFilter::add_path_entry(const String& device_name, return E_OK; } +bool BloomFilter::contains(const String& device_name, + const String& measurement_name) { + if (size_ == 0) { + return true; // empty filter — assume present + } + String entry = get_entry_string(device_name, measurement_name); + if (IS_NULL(entry.buf_)) { + return true; // OOM — conservatively assume present + } + for (uint32_t i = 0; i < hash_func_count_; i++) { + int32_t hv = hash_func_arr_[i].hash(entry); + if (!bitset_.get(hv)) { + free_entry_buf(entry.buf_); + return false; // definitely not present + } + } + free_entry_buf(entry.buf_); + return true; // probably present +} + int BloomFilter::serialize_to(ByteStream& out) { int ret = E_OK; uint8_t* filter_data_bytes = nullptr; diff --git a/cpp/src/reader/bloom_filter.h b/cpp/src/reader/bloom_filter.h index b00de4a84..323cfa8a4 100644 --- a/cpp/src/reader/bloom_filter.h +++ b/cpp/src/reader/bloom_filter.h @@ -74,6 +74,11 @@ class BitSet { int32_t word_offset = pos % 64; words_[word_idx] |= (1ull << word_offset); } + bool get(int32_t pos) const { + int32_t word_idx = pos / 64; + int32_t word_offset = pos % 64; + return (words_[word_idx] & (1ull << word_offset)) != 0; + } int32_t get_words_in_use() const { for (int32_t i = word_count_ - 1; i >= 0; i--) { if (words_[i] != 0) { @@ -107,8 +112,11 @@ class BloomFilter { void destroy() { bitset_.destroy(); } int add_path_entry(const common::String& device_name, const common::String& measurement_name); + bool contains(const common::String& device_name, + const common::String& measurement_name); int serialize_to(common::ByteStream& out); int deserialize_from(common::ByteStream& in); + bool is_empty() const { return size_ == 0; } BitSet* get_bit_set() { return &bitset_; } private: diff --git a/cpp/src/reader/chunk_reader.cc b/cpp/src/reader/chunk_reader.cc index b150f7851..46f455bb4 100644 --- a/cpp/src/reader/chunk_reader.cc +++ b/cpp/src/reader/chunk_reader.cc @@ -422,8 +422,6 @@ int ChunkReader::i32_DECODE_TYPED_TV_INTO_TSBLOCK(ByteStream& time_in, row_appender.backoff_add_row(); continue; } else { - /*std::cout << "decoder: time=" << time << ", value=" << value - * << std::endl;*/ row_appender.append(0, (char*)&time, sizeof(time)); row_appender.append(1, (char*)&value, sizeof(value)); } @@ -432,6 +430,320 @@ int ChunkReader::i32_DECODE_TYPED_TV_INTO_TSBLOCK(ByteStream& time_in, return ret; } +int ChunkReader::i32_DECODE_TV_BATCH(ByteStream& time_in, ByteStream& value_in, + RowAppender& row_appender, + Filter* filter) { + int ret = E_OK; + const int BATCH = 129; + int64_t times[BATCH]; + int32_t values[BATCH]; + + while (time_decoder_->has_remaining(time_in)) { + if (row_appender.remaining() < (uint32_t)BATCH) { + ret = E_OVERFLOW; + break; + } + + // Block-level time filter check + bool block_all_pass = false; + if (filter != nullptr) { + int64_t block_min, block_max; + int block_count; + if (time_decoder_->peek_next_block_range_int64( + time_in, block_min, block_max, block_count)) { + if (!filter->satisfy_start_end_time(block_min, block_max)) { + int skipped = 0; + time_decoder_->skip_peeked_block_int64(time_in, skipped); + value_decoder_->skip_int32(block_count, skipped, value_in); + continue; + } + if (filter->contain_start_end_time(block_min, block_max)) { + block_all_pass = true; + } + } + } + + int time_count = 0; + int value_count = 0; + + if (RET_FAIL(time_decoder_->read_batch_int64(times, BATCH, time_count, + time_in))) { + break; + } + if (time_count == 0) break; + + bool time_mask[BATCH]; + int pass_count = time_count; + if (filter != nullptr && !block_all_pass) { + pass_count = + filter->satisfy_batch_time(times, time_count, time_mask); + } + + if (pass_count == 0) { + int skipped = 0; + value_decoder_->skip_int32(time_count, skipped, value_in); + continue; + } + + if (RET_FAIL(value_decoder_->read_batch_int32(values, BATCH, + value_count, value_in))) { + break; + } + + for (int i = 0; i < time_count; ++i) { + if (filter != nullptr && !block_all_pass && !time_mask[i]) { + continue; + } + if (filter != nullptr && !block_all_pass && + !filter->satisfy(times[i], (int64_t)values[i])) { + continue; + } + if (UNLIKELY(!row_appender.add_row())) { + ret = E_OVERFLOW; + break; + } + row_appender.append(0, (char*)×[i], sizeof(int64_t)); + row_appender.append(1, (char*)&values[i], sizeof(int32_t)); + } + if (ret != E_OK) break; + } + return ret; +} + +int ChunkReader::i64_DECODE_TV_BATCH(ByteStream& time_in, ByteStream& value_in, + RowAppender& row_appender, + Filter* filter) { + int ret = E_OK; + const int BATCH = 129; + int64_t times[BATCH]; + int64_t values[BATCH]; + + while (time_decoder_->has_remaining(time_in)) { + if (row_appender.remaining() < (uint32_t)BATCH) { + ret = E_OVERFLOW; + break; + } + + // Block-level time filter check + bool block_all_pass = false; + if (filter != nullptr) { + int64_t block_min, block_max; + int block_count; + if (time_decoder_->peek_next_block_range_int64( + time_in, block_min, block_max, block_count)) { + if (!filter->satisfy_start_end_time(block_min, block_max)) { + int skipped = 0; + time_decoder_->skip_peeked_block_int64(time_in, skipped); + value_decoder_->skip_int64(block_count, skipped, value_in); + continue; + } + if (filter->contain_start_end_time(block_min, block_max)) { + block_all_pass = true; + } + } + } + + int time_count = 0; + int value_count = 0; + + if (RET_FAIL(time_decoder_->read_batch_int64(times, BATCH, time_count, + time_in))) { + break; + } + if (time_count == 0) break; + + bool time_mask[BATCH]; + int pass_count = time_count; + if (filter != nullptr && !block_all_pass) { + pass_count = + filter->satisfy_batch_time(times, time_count, time_mask); + } + + if (pass_count == 0) { + int skipped = 0; + value_decoder_->skip_int64(time_count, skipped, value_in); + continue; + } + + if (RET_FAIL(value_decoder_->read_batch_int64(values, BATCH, + value_count, value_in))) { + break; + } + + for (int i = 0; i < time_count; ++i) { + if (filter != nullptr && !block_all_pass && !time_mask[i]) { + continue; + } + if (filter != nullptr && !block_all_pass && + !filter->satisfy(times[i], values[i])) { + continue; + } + if (UNLIKELY(!row_appender.add_row())) { + ret = E_OVERFLOW; + break; + } + row_appender.append(0, (char*)×[i], sizeof(int64_t)); + row_appender.append(1, (char*)&values[i], sizeof(int64_t)); + } + if (ret != E_OK) break; + } + return ret; +} + +int ChunkReader::float_DECODE_TV_BATCH(ByteStream& time_in, + ByteStream& value_in, + RowAppender& row_appender, + Filter* filter) { + int ret = E_OK; + const int BATCH = 129; + int64_t times[BATCH]; + float values[BATCH]; + + while (time_decoder_->has_remaining(time_in)) { + if (row_appender.remaining() < (uint32_t)BATCH) { + ret = E_OVERFLOW; + break; + } + + // Block-level time filter check + bool block_all_pass = false; + if (filter != nullptr) { + int64_t block_min, block_max; + int block_count; + if (time_decoder_->peek_next_block_range_int64( + time_in, block_min, block_max, block_count)) { + if (!filter->satisfy_start_end_time(block_min, block_max)) { + int skipped = 0; + time_decoder_->skip_peeked_block_int64(time_in, skipped); + value_decoder_->skip_float(block_count, skipped, value_in); + continue; + } + if (filter->contain_start_end_time(block_min, block_max)) { + block_all_pass = true; + } + } + } + + int time_count = 0; + int value_count = 0; + + if (RET_FAIL(time_decoder_->read_batch_int64(times, BATCH, time_count, + time_in))) { + break; + } + if (time_count == 0) break; + + bool time_mask[BATCH]; + int pass_count = time_count; + if (filter != nullptr && !block_all_pass) { + pass_count = + filter->satisfy_batch_time(times, time_count, time_mask); + } + + if (pass_count == 0) { + int skipped = 0; + value_decoder_->skip_float(time_count, skipped, value_in); + continue; + } + + if (RET_FAIL(value_decoder_->read_batch_float(values, BATCH, + value_count, value_in))) { + break; + } + + for (int i = 0; i < time_count; ++i) { + if (filter != nullptr && !block_all_pass && !time_mask[i]) { + continue; + } + if (UNLIKELY(!row_appender.add_row())) { + ret = E_OVERFLOW; + break; + } + row_appender.append(0, (char*)×[i], sizeof(int64_t)); + row_appender.append(1, (char*)&values[i], sizeof(float)); + } + if (ret != E_OK) break; + } + return ret; +} + +int ChunkReader::double_DECODE_TV_BATCH(ByteStream& time_in, + ByteStream& value_in, + RowAppender& row_appender, + Filter* filter) { + int ret = E_OK; + const int BATCH = 129; + int64_t times[BATCH]; + double values[BATCH]; + + while (time_decoder_->has_remaining(time_in)) { + if (row_appender.remaining() < (uint32_t)BATCH) { + ret = E_OVERFLOW; + break; + } + + // Block-level time filter check + bool block_all_pass = false; + if (filter != nullptr) { + int64_t block_min, block_max; + int block_count; + if (time_decoder_->peek_next_block_range_int64( + time_in, block_min, block_max, block_count)) { + if (!filter->satisfy_start_end_time(block_min, block_max)) { + int skipped = 0; + time_decoder_->skip_peeked_block_int64(time_in, skipped); + value_decoder_->skip_double(block_count, skipped, value_in); + continue; + } + if (filter->contain_start_end_time(block_min, block_max)) { + block_all_pass = true; + } + } + } + + int time_count = 0; + int value_count = 0; + + if (RET_FAIL(time_decoder_->read_batch_int64(times, BATCH, time_count, + time_in))) { + break; + } + if (time_count == 0) break; + + bool time_mask[BATCH]; + int pass_count = time_count; + if (filter != nullptr && !block_all_pass) { + pass_count = + filter->satisfy_batch_time(times, time_count, time_mask); + } + + if (pass_count == 0) { + int skipped = 0; + value_decoder_->skip_double(time_count, skipped, value_in); + continue; + } + + if (RET_FAIL(value_decoder_->read_batch_double( + values, BATCH, value_count, value_in))) { + break; + } + + for (int i = 0; i < time_count; ++i) { + if (filter != nullptr && !block_all_pass && !time_mask[i]) { + continue; + } + if (UNLIKELY(!row_appender.add_row())) { + ret = E_OVERFLOW; + break; + } + row_appender.append(0, (char*)×[i], sizeof(int64_t)); + row_appender.append(1, (char*)&values[i], sizeof(double)); + } + if (ret != E_OK) break; + } + return ret; +} + int ChunkReader::STRING_DECODE_TYPED_TV_INTO_TSBLOCK(ByteStream& time_in, ByteStream& value_in, RowAppender& row_appender, @@ -472,23 +784,21 @@ int ChunkReader::decode_tv_buf_into_tsblock_by_datatype(ByteStream& time_in, break; case common::DATE: case common::INT32: - // DECODE_TYPED_TV_INTO_TSBLOCK(int32_t, int32, time_in_, value_in_, - // row_appender); - ret = i32_DECODE_TYPED_TV_INTO_TSBLOCK(time_in_, value_in_, - row_appender, filter); + ret = + i32_DECODE_TV_BATCH(time_in_, value_in_, row_appender, filter); break; case TIMESTAMP: case common::INT64: - DECODE_TYPED_TV_INTO_TSBLOCK(int64_t, int64, time_in_, value_in_, - row_appender); + ret = + i64_DECODE_TV_BATCH(time_in_, value_in_, row_appender, filter); break; case common::FLOAT: - DECODE_TYPED_TV_INTO_TSBLOCK(float, float, time_in_, value_in_, - row_appender); + ret = float_DECODE_TV_BATCH(time_in_, value_in_, row_appender, + filter); break; case common::DOUBLE: - DECODE_TYPED_TV_INTO_TSBLOCK(double, double, time_in_, value_in_, - row_appender); + ret = double_DECODE_TV_BATCH(time_in_, value_in_, row_appender, + filter); break; case common::TEXT: case common::BLOB: diff --git a/cpp/src/reader/chunk_reader.h b/cpp/src/reader/chunk_reader.h index 3acd9c3cf..a1196c330 100644 --- a/cpp/src/reader/chunk_reader.h +++ b/cpp/src/reader/chunk_reader.h @@ -105,6 +105,20 @@ class ChunkReader : public IChunkReader { common::ByteStream& value_in, common::RowAppender& row_appender, Filter* filter); + int i32_DECODE_TV_BATCH(common::ByteStream& time_in, + common::ByteStream& value_in, + common::RowAppender& row_appender, Filter* filter); + int i64_DECODE_TV_BATCH(common::ByteStream& time_in, + common::ByteStream& value_in, + common::RowAppender& row_appender, Filter* filter); + int float_DECODE_TV_BATCH(common::ByteStream& time_in, + common::ByteStream& value_in, + common::RowAppender& row_appender, + Filter* filter); + int double_DECODE_TV_BATCH(common::ByteStream& time_in, + common::ByteStream& value_in, + common::RowAppender& row_appender, + Filter* filter); int STRING_DECODE_TYPED_TV_INTO_TSBLOCK(common::ByteStream& time_in, common::ByteStream& value_in, common::RowAppender& row_appender, @@ -131,7 +145,7 @@ class ChunkReader : public IChunkReader { * also refer to offset within the chunk (including chunk header). * It advanced by step of a page header or a page tv data. */ - common::ByteStream in_stream_{common::MOD_CHUNK_READER}; + common::ByteStream in_stream_; int32_t file_data_buf_size_; uint32_t chunk_visit_offset_; @@ -141,8 +155,8 @@ class ChunkReader : public IChunkReader { Decoder* time_decoder_; Decoder* value_decoder_; - common::ByteStream time_in_{common::MOD_CHUNK_READER}; - common::ByteStream value_in_{common::MOD_CHUNK_READER}; + common::ByteStream time_in_; + common::ByteStream value_in_; char* uncompressed_buf_; }; diff --git a/cpp/src/reader/filter/and_filter.h b/cpp/src/reader/filter/and_filter.h index 0d01000f8..fd70c163f 100644 --- a/cpp/src/reader/filter/and_filter.h +++ b/cpp/src/reader/filter/and_filter.h @@ -50,6 +50,18 @@ class AndFilter : public BinaryFilter { right_->contain_start_end_time(start_time, end_time); } + int satisfy_batch_time(const int64_t* times, int count, bool* mask) { + bool mask_right[129]; + left_->satisfy_batch_time(times, count, mask); + right_->satisfy_batch_time(times, count, mask_right); + int pass = 0; + for (int i = 0; i < count; ++i) { + mask[i] = mask[i] && mask_right[i]; + if (mask[i]) ++pass; + } + return pass; + } + std::vector* get_time_ranges() { std::vector* result = new std::vector(); std::vector* left_time_ranges = left_->get_time_ranges(); diff --git a/cpp/src/reader/filter/filter.h b/cpp/src/reader/filter/filter.h index f39dddbae..e53992308 100644 --- a/cpp/src/reader/filter/filter.h +++ b/cpp/src/reader/filter/filter.h @@ -63,6 +63,20 @@ class Filter { ASSERT(false); return nullptr; } + + // Batch time filter: evaluate time filter on an array of timestamps. + // Writes true/false into @mask for each element. + // Returns the number of elements that passed (mask[i] == true). + // Default: scalar fallback using satisfy_start_end_time. + virtual int satisfy_batch_time(const int64_t* times, int count, + bool* mask) { + int pass = 0; + for (int i = 0; i < count; ++i) { + mask[i] = satisfy_start_end_time(times[i], times[i]); + if (mask[i]) ++pass; + } + return pass; + } }; } // namespace storage diff --git a/cpp/src/reader/filter/or_filter.h b/cpp/src/reader/filter/or_filter.h index 1d4aa6aa7..10cfde164 100644 --- a/cpp/src/reader/filter/or_filter.h +++ b/cpp/src/reader/filter/or_filter.h @@ -50,6 +50,18 @@ class OrFilter : public BinaryFilter { right_->contain_start_end_time(start_time, end_time); } + int satisfy_batch_time(const int64_t* times, int count, bool* mask) { + bool mask_right[129]; + left_->satisfy_batch_time(times, count, mask); + right_->satisfy_batch_time(times, count, mask_right); + int pass = 0; + for (int i = 0; i < count; ++i) { + mask[i] = mask[i] || mask_right[i]; + if (mask[i]) ++pass; + } + return pass; + } + std::vector* get_time_ranges() { std::vector* result = new std::vector(); std::vector* left_time_ranges = left_->get_time_ranges(); diff --git a/cpp/src/reader/filter/time_operator.cc b/cpp/src/reader/filter/time_operator.cc index 19f33b599..3cc40e7cb 100644 --- a/cpp/src/reader/filter/time_operator.cc +++ b/cpp/src/reader/filter/time_operator.cc @@ -18,9 +18,17 @@ */ #include "time_operator.h" +#include + #include "common/statistic.h" #include "utils/storage_utils.h" +#if defined(__ARM_NEON) +#include +#elif defined(ENABLE_SIMD) +#include "simde/x86/avx2.h" +#endif + namespace storage { TimeBetween::TimeBetween(int64_t value1, int64_t value2, bool not_between) @@ -308,4 +316,269 @@ std::vector* TimeLtEq::get_time_ranges() { return result; } +// ============================================================================ +// SIMD batch time filter implementations +// ============================================================================ + +// Helper: extract 4-bit movemask from 256-bit comparison result (4 x i64) +#if !defined(__ARM_NEON) && defined(ENABLE_SIMD) +static inline int simd_movemask_epi64(simde__m256i v) { + // movemask_pd reinterprets as double and checks sign bit = high bit of each + // 64-bit lane + return simde_mm256_movemask_pd(simde_mm256_castsi256_pd(v)); +} +#endif + +int TimeGt::satisfy_batch_time(const int64_t* times, int count, bool* mask) { + int pass = 0; + int i = 0; +#if defined(__ARM_NEON) + int64x2_t vval = vdupq_n_s64(value_); + for (; i + 1 < count; i += 2) { + int64x2_t vt = vld1q_s64(times + i); + uint64x2_t cmp = vcgtq_s64(vt, vval); + mask[i] = vgetq_lane_u64(cmp, 0) != 0; + mask[i + 1] = vgetq_lane_u64(cmp, 1) != 0; + pass += mask[i] + mask[i + 1]; + } +#elif defined(ENABLE_SIMD) + simde__m256i vval = simde_mm256_set1_epi64x(value_); + for (; i + 3 < count; i += 4) { + simde__m256i vt = + simde_mm256_loadu_si256((const simde__m256i*)(times + i)); + // time > value_ => cmpgt(time, value_) + simde__m256i cmp = simde_mm256_cmpgt_epi64(vt, vval); + int bits = simd_movemask_epi64(cmp); + for (int j = 0; j < 4; ++j) { + mask[i + j] = (bits >> j) & 1; + pass += mask[i + j]; + } + } +#endif + for (; i < count; ++i) { + mask[i] = value_ < times[i]; + if (mask[i]) ++pass; + } + return pass; +} + +int TimeGtEq::satisfy_batch_time(const int64_t* times, int count, bool* mask) { + int pass = 0; + int i = 0; +#if defined(__ARM_NEON) + int64x2_t vval = vdupq_n_s64(value_); + for (; i + 1 < count; i += 2) { + int64x2_t vt = vld1q_s64(times + i); + uint64x2_t cmp = vcgeq_s64(vt, vval); + mask[i] = vgetq_lane_u64(cmp, 0) != 0; + mask[i + 1] = vgetq_lane_u64(cmp, 1) != 0; + pass += mask[i] + mask[i + 1]; + } +#elif defined(ENABLE_SIMD) + simde__m256i vval = simde_mm256_set1_epi64x(value_); + for (; i + 3 < count; i += 4) { + simde__m256i vt = + simde_mm256_loadu_si256((const simde__m256i*)(times + i)); + // time >= value_ => NOT(cmpgt(value_, time)) + simde__m256i cmp = simde_mm256_cmpgt_epi64(vval, vt); + simde__m256i ncmp = + simde_mm256_xor_si256(cmp, simde_mm256_set1_epi64x((int64_t)-1)); + int bits = simd_movemask_epi64(ncmp); + for (int j = 0; j < 4; ++j) { + mask[i + j] = (bits >> j) & 1; + pass += mask[i + j]; + } + } +#endif + for (; i < count; ++i) { + mask[i] = value_ <= times[i]; + if (mask[i]) ++pass; + } + return pass; +} + +int TimeLt::satisfy_batch_time(const int64_t* times, int count, bool* mask) { + int pass = 0; + int i = 0; +#if defined(__ARM_NEON) + int64x2_t vval = vdupq_n_s64(value_); + for (; i + 1 < count; i += 2) { + int64x2_t vt = vld1q_s64(times + i); + uint64x2_t cmp = vcltq_s64(vt, vval); + mask[i] = vgetq_lane_u64(cmp, 0) != 0; + mask[i + 1] = vgetq_lane_u64(cmp, 1) != 0; + pass += mask[i] + mask[i + 1]; + } +#elif defined(ENABLE_SIMD) + simde__m256i vval = simde_mm256_set1_epi64x(value_); + for (; i + 3 < count; i += 4) { + simde__m256i vt = + simde_mm256_loadu_si256((const simde__m256i*)(times + i)); + // time < value_ => cmpgt(value_, time) + simde__m256i cmp = simde_mm256_cmpgt_epi64(vval, vt); + int bits = simd_movemask_epi64(cmp); + for (int j = 0; j < 4; ++j) { + mask[i + j] = (bits >> j) & 1; + pass += mask[i + j]; + } + } +#endif + for (; i < count; ++i) { + mask[i] = value_ > times[i]; + if (mask[i]) ++pass; + } + return pass; +} + +int TimeLtEq::satisfy_batch_time(const int64_t* times, int count, bool* mask) { + int pass = 0; + int i = 0; +#if defined(__ARM_NEON) + int64x2_t vval = vdupq_n_s64(value_); + for (; i + 1 < count; i += 2) { + int64x2_t vt = vld1q_s64(times + i); + uint64x2_t cmp = vcleq_s64(vt, vval); + mask[i] = vgetq_lane_u64(cmp, 0) != 0; + mask[i + 1] = vgetq_lane_u64(cmp, 1) != 0; + pass += mask[i] + mask[i + 1]; + } +#elif defined(ENABLE_SIMD) + simde__m256i vval = simde_mm256_set1_epi64x(value_); + for (; i + 3 < count; i += 4) { + simde__m256i vt = + simde_mm256_loadu_si256((const simde__m256i*)(times + i)); + // time <= value_ => NOT(cmpgt(time, value_)) + simde__m256i cmp = simde_mm256_cmpgt_epi64(vt, vval); + simde__m256i ncmp = + simde_mm256_xor_si256(cmp, simde_mm256_set1_epi64x((int64_t)-1)); + int bits = simd_movemask_epi64(ncmp); + for (int j = 0; j < 4; ++j) { + mask[i + j] = (bits >> j) & 1; + pass += mask[i + j]; + } + } +#endif + for (; i < count; ++i) { + mask[i] = value_ >= times[i]; + if (mask[i]) ++pass; + } + return pass; +} + +int TimeEq::satisfy_batch_time(const int64_t* times, int count, bool* mask) { + int pass = 0; + int i = 0; +#if defined(__ARM_NEON) + int64x2_t vval = vdupq_n_s64(value_); + for (; i + 1 < count; i += 2) { + int64x2_t vt = vld1q_s64(times + i); + uint64x2_t cmp = vceqq_s64(vt, vval); + mask[i] = vgetq_lane_u64(cmp, 0) != 0; + mask[i + 1] = vgetq_lane_u64(cmp, 1) != 0; + pass += mask[i] + mask[i + 1]; + } +#elif defined(ENABLE_SIMD) + simde__m256i vval = simde_mm256_set1_epi64x(value_); + for (; i + 3 < count; i += 4) { + simde__m256i vt = + simde_mm256_loadu_si256((const simde__m256i*)(times + i)); + simde__m256i cmp = simde_mm256_cmpeq_epi64(vt, vval); + int bits = simd_movemask_epi64(cmp); + for (int j = 0; j < 4; ++j) { + mask[i + j] = (bits >> j) & 1; + pass += mask[i + j]; + } + } +#endif + for (; i < count; ++i) { + mask[i] = value_ == times[i]; + if (mask[i]) ++pass; + } + return pass; +} + +int TimeNotEq::satisfy_batch_time(const int64_t* times, int count, bool* mask) { + int pass = 0; + int i = 0; +#if defined(__ARM_NEON) + int64x2_t vval = vdupq_n_s64(value_); + uint64x2_t ones = vdupq_n_u64(UINT64_MAX); + for (; i + 1 < count; i += 2) { + int64x2_t vt = vld1q_s64(times + i); + uint64x2_t cmp = veorq_u64(vceqq_s64(vt, vval), ones); + mask[i] = vgetq_lane_u64(cmp, 0) != 0; + mask[i + 1] = vgetq_lane_u64(cmp, 1) != 0; + pass += mask[i] + mask[i + 1]; + } +#elif defined(ENABLE_SIMD) + simde__m256i vval = simde_mm256_set1_epi64x(value_); + for (; i + 3 < count; i += 4) { + simde__m256i vt = + simde_mm256_loadu_si256((const simde__m256i*)(times + i)); + simde__m256i eq = simde_mm256_cmpeq_epi64(vt, vval); + simde__m256i neq = + simde_mm256_xor_si256(eq, simde_mm256_set1_epi64x((int64_t)-1)); + int bits = simd_movemask_epi64(neq); + for (int j = 0; j < 4; ++j) { + mask[i + j] = (bits >> j) & 1; + pass += mask[i + j]; + } + } +#endif + for (; i < count; ++i) { + mask[i] = value_ != times[i]; + if (mask[i]) ++pass; + } + return pass; +} + +int TimeBetween::satisfy_batch_time(const int64_t* times, int count, + bool* mask) { + int pass = 0; + int i = 0; +#if defined(__ARM_NEON) + int64x2_t vlo = vdupq_n_s64(value1_); + int64x2_t vhi = vdupq_n_s64(value2_); + uint64x2_t ones = vdupq_n_u64(UINT64_MAX); + for (; i + 1 < count; i += 2) { + int64x2_t vt = vld1q_s64(times + i); + uint64x2_t ge_lo = vcgeq_s64(vt, vlo); + uint64x2_t le_hi = vcleq_s64(vt, vhi); + uint64x2_t between = vandq_u64(ge_lo, le_hi); + uint64x2_t result = not_ ? veorq_u64(between, ones) : between; + mask[i] = vgetq_lane_u64(result, 0) != 0; + mask[i + 1] = vgetq_lane_u64(result, 1) != 0; + pass += mask[i] + mask[i + 1]; + } +#elif defined(ENABLE_SIMD) + simde__m256i vlo = simde_mm256_set1_epi64x(value1_); + simde__m256i vhi = simde_mm256_set1_epi64x(value2_); + simde__m256i ones = simde_mm256_set1_epi64x((int64_t)-1); + for (; i + 3 < count; i += 4) { + simde__m256i vt = + simde_mm256_loadu_si256((const simde__m256i*)(times + i)); + // time >= lo => NOT(cmpgt(lo, time)) + simde__m256i ge_lo = + simde_mm256_xor_si256(simde_mm256_cmpgt_epi64(vlo, vt), ones); + // time <= hi => NOT(cmpgt(time, hi)) + simde__m256i le_hi = + simde_mm256_xor_si256(simde_mm256_cmpgt_epi64(vt, vhi), ones); + simde__m256i between = simde_mm256_and_si256(ge_lo, le_hi); + simde__m256i result = + not_ ? simde_mm256_xor_si256(between, ones) : between; + int bits = simd_movemask_epi64(result); + for (int j = 0; j < 4; ++j) { + mask[i + j] = (bits >> j) & 1; + pass += mask[i + j]; + } + } +#endif + for (; i < count; ++i) { + bool in_range = (value1_ <= times[i]) && (times[i] <= value2_); + mask[i] = not_ ? !in_range : in_range; + if (mask[i]) ++pass; + } + return pass; +} + } // namespace storage diff --git a/cpp/src/reader/filter/time_operator.h b/cpp/src/reader/filter/time_operator.h index 29930b88a..f972a4259 100644 --- a/cpp/src/reader/filter/time_operator.h +++ b/cpp/src/reader/filter/time_operator.h @@ -47,6 +47,9 @@ class TimeBetween : public Filter { bool contain_start_end_time(int64_t start_time, int64_t end_time); std::vector* get_time_ranges(); + + int satisfy_batch_time(const int64_t* times, int count, bool* mask); + FilterType get_filter_type() { return type_; } private: @@ -99,6 +102,8 @@ class TimeEq : public Filter { std::vector* get_time_ranges(); + int satisfy_batch_time(const int64_t* times, int count, bool* mask); + FilterType get_filter_type() { return type_; } private: @@ -122,6 +127,9 @@ class TimeNotEq : public Filter { bool contain_start_end_time(int64_t start_time, int64_t end_time); std::vector* get_time_ranges(); + + int satisfy_batch_time(const int64_t* times, int count, bool* mask); + FilterType get_filter_type() { return type_; } private: @@ -146,6 +154,8 @@ class TimeGt : public Filter { std::vector* get_time_ranges(); + int satisfy_batch_time(const int64_t* times, int count, bool* mask); + FilterType get_filter_type() { return type_; } private: @@ -169,6 +179,9 @@ class TimeGtEq : public Filter { bool contain_start_end_time(int64_t start_time, int64_t end_time); std::vector* get_time_ranges(); + + int satisfy_batch_time(const int64_t* times, int count, bool* mask); + void reset_value(int64_t val) { value_ = val; } FilterType get_filter_type() { return type_; } @@ -194,6 +207,8 @@ class TimeLt : public Filter { std::vector* get_time_ranges(); + int satisfy_batch_time(const int64_t* times, int count, bool* mask); + FilterType get_filter_type() { return type_; } private: @@ -217,6 +232,9 @@ class TimeLtEq : public Filter { bool contain_start_end_time(int64_t start_time, int64_t end_time); std::vector* get_time_ranges(); + + int satisfy_batch_time(const int64_t* times, int count, bool* mask); + FilterType get_filter_type() { return type_; } private: diff --git a/cpp/src/reader/qds_without_timegenerator.cc b/cpp/src/reader/qds_without_timegenerator.cc index 474e13b77..d8129ce0e 100644 --- a/cpp/src/reader/qds_without_timegenerator.cc +++ b/cpp/src/reader/qds_without_timegenerator.cc @@ -19,7 +19,6 @@ #include "qds_without_timegenerator.h" -#include "utils/errno_define.h" #include "utils/util_define.h" using namespace common; @@ -67,25 +66,17 @@ int QDSWithoutTimeGenerator::init_internal(TsFileIOReader* io_reader, TsFileSeriesScanIterator* ssi = nullptr; ret = io_reader_->alloc_ssi(paths[i].device_id_, paths[i].measurement_, ssi, pa_, global_time_filter); - if (ret == E_MEASUREMENT_NOT_EXIST || ret == E_DEVICE_NOT_EXIST || - ret == E_NOT_EXIST) { - continue; - } - if (ret != E_OK) { - for (size_t j = 0; j < ssi_vec_.size(); j++) { - io_reader_->revert_ssi(ssi_vec_[j]); - } - ssi_vec_.clear(); + if (ret != 0) { return ret; + } else { + index_lookup_.insert({paths[i].measurement_, i + 1}); + if (paths[i].full_path_ != paths[i].measurement_) { + index_lookup_.insert({paths[i].full_path_, i + 1}); + } + ssi_vec_.push_back(ssi); + valid_paths.push_back(paths[i]); + column_names.push_back(paths[i].full_path_); } - size_t col_idx = ssi_vec_.size(); - index_lookup_.insert({paths[i].measurement_, col_idx + 1}); - if (paths[i].full_path_ != paths[i].measurement_) { - index_lookup_.insert({paths[i].full_path_, col_idx + 1}); - } - ssi_vec_.push_back(ssi); - valid_paths.push_back(paths[i]); - column_names.push_back(paths[i].full_path_); } size_t path_count = valid_paths.size(); @@ -144,7 +135,6 @@ void QDSWithoutTimeGenerator::close() { io_reader_->revert_ssi(ssi); } ssi_vec_.clear(); - tsblocks_.clear(); if (qe_ != nullptr) { delete qe_; qe_ = nullptr; @@ -177,14 +167,11 @@ int QDSWithoutTimeGenerator::next(bool& has_next) { uint32_t len = 0; uint32_t idx = heap_time_.begin()->second; - bool is_null_val = false; auto val_datatype = value_iters_[idx]->get_data_type(); - void* val_ptr = value_iters_[idx]->read(&len, &is_null_val); + void* val_ptr = value_iters_[idx]->read(&len); if (!skip_row) { - if (!is_null_val) { - row_record_->get_field(idx + 1)->set_value( - val_datatype, val_ptr, len, pa_); - } + row_record_->get_field(idx + 1)->set_value(val_datatype, + val_ptr, len, pa_); } value_iters_[idx]->next(); @@ -232,14 +219,10 @@ int QDSWithoutTimeGenerator::next(bool& has_next) { std::multimap::iterator iter = heap_time_.find(time); for (uint32_t i = 0; i < count; ++i) { uint32_t len = 0; - bool is_null_val = false; auto val_datatype = value_iters_[iter->second]->get_data_type(); - void* val_ptr = - value_iters_[iter->second]->read(&len, &is_null_val); - if (!is_null_val) { - row_record_->get_field(iter->second + 1) - ->set_value(val_datatype, val_ptr, len, pa_); - } + void* val_ptr = value_iters_[iter->second]->read(&len); + row_record_->get_field(iter->second + 1) + ->set_value(val_datatype, val_ptr, len, pa_); value_iters_[iter->second]->next(); if (!time_iters_[iter->second]->end()) { int64_t timev = diff --git a/cpp/src/reader/qds_without_timegenerator.h b/cpp/src/reader/qds_without_timegenerator.h index 1d929e575..9bb9d1a81 100644 --- a/cpp/src/reader/qds_without_timegenerator.h +++ b/cpp/src/reader/qds_without_timegenerator.h @@ -31,6 +31,8 @@ namespace storage { class QDSWithoutTimeGenerator : public ResultSet { public: + using ResultSet::get_next_tsblock; + QDSWithoutTimeGenerator() : result_set_metadata_(nullptr), io_reader_(nullptr), diff --git a/cpp/src/reader/result_set.h b/cpp/src/reader/result_set.h index 1f1653603..c230e0a84 100644 --- a/cpp/src/reader/result_set.h +++ b/cpp/src/reader/result_set.h @@ -21,7 +21,6 @@ #define READER_QUERY_DATA_SET_H #include -#include #include #include @@ -306,7 +305,7 @@ inline ResultSetIterator ResultSet::iterator() { return ResultSetIterator(this); } -static MAYBE_UNUSED void print_table_result_set( +MAYBE_UNUSED static void print_table_result_set( storage::ResultSet* table_result_set) { if (table_result_set == nullptr) { std::cout << "TableResultSet is nullptr" << std::endl; diff --git a/cpp/src/reader/table_query_executor.cc b/cpp/src/reader/table_query_executor.cc index d5145104d..c23ffc0f4 100644 --- a/cpp/src/reader/table_query_executor.cc +++ b/cpp/src/reader/table_query_executor.cc @@ -157,9 +157,8 @@ int TableQueryExecutor::query(const std::string& table_name, return common::E_UNSUPPORTED_ORDER; } assert(tsblock_reader != nullptr); - ret_qds = - new TableResultSet(std::move(tsblock_reader), lower_case_column_names, - data_types, return_mode_); + ret_qds = new TableResultSet(std::move(tsblock_reader), + lower_case_column_names, data_types); return ret; } diff --git a/cpp/src/reader/table_result_set.cc b/cpp/src/reader/table_result_set.cc index 81b58ce68..d0554fd97 100644 --- a/cpp/src/reader/table_result_set.cc +++ b/cpp/src/reader/table_result_set.cc @@ -79,10 +79,9 @@ int TableResultSet::next(bool& has_next) { if (!null) { row_record_->get_field(i)->set_value( row_iterator_->get_data_type(i), value, len, pa_); - row_iterator_->next(i); } } - row_iterator_->update_row_id(); + row_iterator_->next(); } return ret; } @@ -138,7 +137,13 @@ int TableResultSet::get_next_tsblock(common::TsBlock*& block) { } void TableResultSet::close() { - tsblock_reader_->close(); + if (closed_) { + return; + } + closed_ = true; + if (tsblock_reader_) { + tsblock_reader_->close(); + } pa_.destroy(); if (row_record_) { delete row_record_; @@ -150,4 +155,4 @@ void TableResultSet::close() { } } -} // namespace storage \ No newline at end of file +} // namespace storage diff --git a/cpp/src/reader/table_result_set.h b/cpp/src/reader/table_result_set.h index 072a63f6f..d9f171678 100644 --- a/cpp/src/reader/table_result_set.h +++ b/cpp/src/reader/table_result_set.h @@ -58,6 +58,7 @@ class TableResultSet : public ResultSet { std::vector column_names_; std::vector data_types_; const int return_mode_; + bool closed_ = false; }; } // namespace storage -#endif // TABLE_RESULT_SET_H \ No newline at end of file +#endif // TABLE_RESULT_SET_H diff --git a/cpp/src/reader/task/device_query_task.cc b/cpp/src/reader/task/device_query_task.cc index c7e7091ff..187a6f3a9 100644 --- a/cpp/src/reader/task/device_query_task.cc +++ b/cpp/src/reader/task/device_query_task.cc @@ -33,10 +33,6 @@ DeviceQueryTask* DeviceQueryTask::create_device_query_task( return task; } -DeviceQueryTask::~DeviceQueryTask() { - if (index_root_) { - index_root_->~MetaIndexNode(); - } -} +DeviceQueryTask::~DeviceQueryTask() = default; } // namespace storage diff --git a/cpp/src/reader/tsfile_reader.cc b/cpp/src/reader/tsfile_reader.cc index cabf02b08..583b8a0c9 100644 --- a/cpp/src/reader/tsfile_reader.cc +++ b/cpp/src/reader/tsfile_reader.cc @@ -25,12 +25,76 @@ using namespace common; using namespace storage; +namespace { + +struct DeviceMetaEntry { + std::shared_ptr device_id; + int64_t start_offset; + int64_t end_offset; +}; + +int get_all_device_entries(std::vector& entries, + std::shared_ptr index_node, + ReadFile* read_file, PageArena& pa) { + int ret = E_OK; + if (index_node == nullptr) { + return ret; + } + if (index_node->node_type_ == LEAF_DEVICE) { + for (size_t i = 0; i < index_node->children_.size(); i++) { + DeviceMetaEntry entry; + entry.device_id = index_node->children_[i]->get_device_id(); + entry.start_offset = index_node->children_[i]->get_offset(); + entry.end_offset = (i + 1 < index_node->children_.size()) + ? index_node->children_[i + 1]->get_offset() + : index_node->end_offset_; + entries.push_back(entry); + } + } else { + for (size_t idx = 0; idx < index_node->children_.size(); idx++) { + auto meta_index_entry = index_node->children_[idx]; + int64_t start_offset = meta_index_entry->get_offset(); + int64_t end_offset = index_node->end_offset_; + if (idx + 1 < index_node->children_.size()) { + end_offset = index_node->children_[idx + 1]->get_offset(); + } + ASSERT(end_offset - start_offset > 0); + const int32_t read_size = (int32_t)(end_offset - start_offset); + int32_t ret_read_len = 0; + char* data_buf = (char*)pa.alloc(read_size); + void* m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode)); + if (IS_NULL(data_buf) || IS_NULL(m_idx_node_buf)) { + return E_OOM; + } + auto* top_node_ptr = new (m_idx_node_buf) MetaIndexNode(&pa); + auto top_node = std::shared_ptr( + top_node_ptr, [](MetaIndexNode* ptr) { + if (ptr) { + ptr->~MetaIndexNode(); + } + }); + if (RET_FAIL(read_file->read(start_offset, data_buf, read_size, + ret_read_len))) { + } else if (RET_FAIL(top_node->device_deserialize_from(data_buf, + read_size))) { + } else { + ret = get_all_device_entries(entries, top_node, read_file, pa); + } + if (ret != E_OK) { + return ret; + } + } + } + return ret; +} + +} // namespace + namespace storage { TsFileReader::TsFileReader() : read_file_(nullptr), tsfile_executor_(nullptr), - table_query_executor_(nullptr), - table_query_executor_batch_size_(0) { + table_query_executor_(nullptr) { tsfile_reader_meta_pa_.init(512, MOD_TSFILE_READER); } @@ -58,7 +122,6 @@ int TsFileReader::close() { delete table_query_executor_; table_query_executor_ = nullptr; } - table_query_executor_batch_size_ = 0; if (read_file_ != nullptr) { read_file_->close(); delete read_file_; @@ -67,22 +130,6 @@ int TsFileReader::close() { return ret; } -int TsFileReader::ensure_table_query_executor(int batch_size) { - if (table_query_executor_ != nullptr && - table_query_executor_batch_size_ == batch_size) { - return E_OK; - } - - if (table_query_executor_ != nullptr) { - delete table_query_executor_; - table_query_executor_ = nullptr; - } - - table_query_executor_ = new TableQueryExecutor(read_file_, batch_size); - table_query_executor_batch_size_ = batch_size; - return E_OK; -} - int TsFileReader::query(QueryExpression* qe, ResultSet*& ret_qds) { return tsfile_executor_->execute(qe, ret_qds); } @@ -128,7 +175,9 @@ int TsFileReader::query(const std::string& table_name, } Filter* time_filter = new TimeBetween(start_time, end_time, false); - ensure_table_query_executor(batch_size); + if (table_query_executor_ == nullptr) { + table_query_executor_ = new TableQueryExecutor(read_file_, batch_size); + } ret = table_query_executor_->query(to_lower(table_name), columns_names, time_filter, tag_filter, nullptr, result_set); @@ -151,8 +200,7 @@ int TsFileReader::queryByRow(std::vector& path_list, int offset, int TsFileReader::queryByRow(const std::string& table_name, const std::vector& column_names, - int offset, int limit, ResultSet*& result_set, - Filter* tag_filter, int batch_size) { + int offset, int limit, ResultSet*& result_set) { int ret = E_OK; TsFileMeta* tsfile_meta = tsfile_executor_->get_tsfile_meta(); if (tsfile_meta == nullptr) { @@ -163,9 +211,12 @@ int TsFileReader::queryByRow(const std::string& table_name, return E_TABLE_NOT_EXIST; } - ensure_table_query_executor(batch_size); + if (table_query_executor_ == nullptr) { + table_query_executor_ = new TableQueryExecutor(read_file_); + } ret = table_query_executor_->query(to_lower(table_name), column_names, - /*time_filter=*/nullptr, tag_filter, + /*time_filter=*/nullptr, + /*tag_filter=*/nullptr, /*field_filter=*/nullptr, offset, limit, result_set); return ret; @@ -242,7 +293,9 @@ int TsFileReader::query_table_on_tree( columns_names[i] = "col_" + std::to_string(i); } Filter* time_filter = new TimeBetween(star_time, end_time, false); - ensure_table_query_executor(-1); + if (table_query_executor_ == nullptr) { + table_query_executor_ = new TableQueryExecutor(read_file_); + } ret = table_query_executor_->query_on_tree( satisfied_device_ids, columns_names, measurement_names_to_query, time_filter, result_set); @@ -346,16 +399,9 @@ int TsFileReader::get_timeseries_schema( device_id, timeseries_indexs, pa))) { } else { for (auto timeseries_index : timeseries_indexs) { - auto* aligned_timeseries_index = - dynamic_cast(timeseries_index); - auto data_type = - aligned_timeseries_index != nullptr && - aligned_timeseries_index->value_ts_idx_ != nullptr - ? aligned_timeseries_index->value_ts_idx_->get_data_type() - : timeseries_index->get_data_type(); MeasurementSchema ms( timeseries_index->get_measurement_name().to_std_string(), - data_type); + timeseries_index->get_data_type()); result.push_back(ms); } } @@ -367,8 +413,6 @@ int TsFileReader::get_timeseries_metadata_impl( std::vector>& result) { int ret = E_OK; std::vector timeseries_indexs; - tsfile_reader_meta_pa_.init(512, MOD_TSFILE_READER); - // Pointers are owned by tsfile_reader_meta_pa_; shared_ptr must not delete auto noop_deleter = [](ITimeseriesIndex*) {}; if (RET_FAIL( tsfile_executor_->get_tsfile_io_reader() @@ -397,13 +441,36 @@ DeviceTimeseriesMetadataMap TsFileReader::get_timeseries_metadata( } DeviceTimeseriesMetadataMap TsFileReader::get_timeseries_metadata() { - // Collect metadata for all devices present in the file DeviceTimeseriesMetadataMap result; - auto device_ids = get_all_device_ids(); - for (const auto& device_id : device_ids) { - std::vector> list; - if (get_timeseries_metadata_impl(device_id, list) == E_OK) { - result.insert(std::make_pair(device_id, std::move(list))); + TsFileMeta* tsfile_meta = tsfile_executor_->get_tsfile_meta(); + if (tsfile_meta == nullptr) { + return result; + } + + PageArena pa; + pa.init(512, MOD_TSFILE_READER); + std::vector entries; + for (auto& table_entry : tsfile_meta->table_metadata_index_node_map_) { + if (get_all_device_entries(entries, table_entry.second, read_file_, + pa) != E_OK) { + return result; + } + } + + auto noop_deleter = [](ITimeseriesIndex*) {}; + for (auto& device_entry : entries) { + std::vector raw_ts_indexes; + if (tsfile_executor_->get_tsfile_io_reader() + ->get_device_timeseries_meta_by_offset( + device_entry.start_offset, device_entry.end_offset, + raw_ts_indexes, tsfile_reader_meta_pa_) == E_OK) { + std::vector> list; + for (auto ts_idx : raw_ts_indexes) { + list.emplace_back( + std::shared_ptr(ts_idx, noop_deleter)); + } + result.insert( + std::make_pair(device_entry.device_id, std::move(list))); } } return result; diff --git a/cpp/src/reader/tsfile_reader.h b/cpp/src/reader/tsfile_reader.h index 19d83ec61..324d202d3 100644 --- a/cpp/src/reader/tsfile_reader.h +++ b/cpp/src/reader/tsfile_reader.h @@ -143,13 +143,11 @@ class TsFileReader { * @param offset Number of leading rows to skip (>= 0). * @param limit Maximum rows to return. < 0 means unlimited. * @param[out] result_set The result set containing query results. - * @param tag_filter Optional tag filter for filtering by tag columns. * @return Returns 0 on success, or a non-zero error code on failure. */ int queryByRow(const std::string& table_name, const std::vector& column_names, int offset, - int limit, ResultSet*& result_set, - Filter* tag_filter = nullptr, int batch_size = 0); + int limit, ResultSet*& result_set); int query_table_on_tree(const std::vector& measurement_names, int64_t star_time, int64_t end_time, @@ -233,7 +231,6 @@ class TsFileReader { std::vector> get_all_table_schemas(); private: - int ensure_table_query_executor(int batch_size); int get_timeseries_metadata_impl( std::shared_ptr device_id, std::vector>& result); @@ -243,7 +240,6 @@ class TsFileReader { storage::ReadFile* read_file_; storage::TsFileExecutor* tsfile_executor_; storage::TableQueryExecutor* table_query_executor_; - int table_query_executor_batch_size_; common::PageArena tsfile_reader_meta_pa_; }; diff --git a/cpp/src/reader/tsfile_series_scan_iterator.cc b/cpp/src/reader/tsfile_series_scan_iterator.cc index 1d666bfc0..0f4f16c64 100644 --- a/cpp/src/reader/tsfile_series_scan_iterator.cc +++ b/cpp/src/reader/tsfile_series_scan_iterator.cc @@ -19,6 +19,13 @@ #include "reader/tsfile_series_scan_iterator.h" +#include + +#include "common/global.h" +#ifdef ENABLE_THREADS +#include "common/thread_pool.h" +#endif + using namespace common; namespace storage { @@ -34,6 +41,12 @@ void TsFileSeriesScanIterator::destroy() { delete tsblock_; tsblock_ = nullptr; } +#ifdef ENABLE_THREADS + if (decode_pool_ != nullptr) { + delete decode_pool_; + decode_pool_ = nullptr; + } +#endif } bool TsFileSeriesScanIterator::should_skip_chunk_by_time( @@ -60,30 +73,6 @@ bool TsFileSeriesScanIterator::should_skip_chunk_by_offset(ChunkMeta* cm) { return false; } -bool TsFileSeriesScanIterator::should_skip_aligned_chunk_by_offset( - ChunkMeta* time_cm, ChunkMeta* value_cm) { - if (row_offset_ <= 0) { - return false; - } - if (time_cm->statistic_ == nullptr || value_cm->statistic_ == nullptr) { - return false; - } - int32_t tc = time_cm->statistic_->count_; - int32_t vc = value_cm->statistic_->count_; - if (tc <= 0 || vc <= 0) { - return false; - } - if (tc != vc) { - return false; - } - int32_t count = tc; - if (row_offset_ >= count) { - row_offset_ -= count; - return true; - } - return false; -} - int TsFileSeriesScanIterator::get_next(TsBlock*& ret_tsblock, bool alloc, Filter* oneshoot_filter, int64_t min_time_hint) { @@ -91,77 +80,95 @@ int TsFileSeriesScanIterator::get_next(TsBlock*& ret_tsblock, bool alloc, Filter* filter = (oneshoot_filter != nullptr) ? oneshoot_filter : time_filter_; - bool force_load_next_chunk = false; while (true) { - // When get_next_page() reports no more data for the current chunk but - // metadata still lists more chunks, we must load the next chunk. A - // bare continue would retry the exhausted reader forever if - // has_more_data() still returns true (e.g. aligned chunk state). - if (!chunk_reader_->has_more_data() || force_load_next_chunk) { - force_load_next_chunk = false; + if (!chunk_reader_->has_more_data()) { while (true) { if (!has_next_chunk()) { return E_NO_MORE_DATA; + } else if (is_multi_value_) { + // Multi-value aligned path + ChunkMeta* time_cm = time_chunk_meta_cursor_.get(); + std::vector value_cms; + value_cms.reserve(value_chunk_meta_cursors_.size()); + for (auto& cur : value_chunk_meta_cursors_) { + value_cms.push_back(cur.get()); + } + advance_to_next_chunk(); + // Skip chunk by time filter using time chunk statistics. + if (filter != nullptr && time_cm->statistic_ != nullptr && + !filter->satisfy(time_cm->statistic_)) { + continue; + } + if (should_skip_chunk_by_time(time_cm, min_time_hint)) { + continue; + } + chunk_reader_->reset(); + auto* acr = static_cast(chunk_reader_); + if (RET_FAIL(acr->load_by_aligned_meta_multi(time_cm, + value_cms))) { + } + break; + } else if (!is_aligned_) { + ChunkMeta* cm = get_current_chunk_meta(); + advance_to_next_chunk(); + if (filter != nullptr && cm->statistic_ != nullptr && + !filter->satisfy(cm->statistic_)) { + continue; + } + // Skip by min_time_hint (merge cursor). + if (should_skip_chunk_by_time(cm, min_time_hint)) { + continue; + } + // Single-path: skip entire chunk by offset using count. + if (should_skip_chunk_by_offset(cm)) { + continue; + } + chunk_reader_->reset(); + if (RET_FAIL(chunk_reader_->load_by_meta(cm))) { + } + break; } else { - if (!is_aligned_) { - ChunkMeta* cm = get_current_chunk_meta(); - advance_to_next_chunk(); - // Skip by time filter. - if (filter != nullptr && cm->statistic_ != nullptr && - !filter->satisfy(cm->statistic_)) { - continue; - } - // Skip by min_time_hint (merge cursor). - if (should_skip_chunk_by_time(cm, min_time_hint)) { - continue; - } - // Single-path: skip entire chunk by offset using count. - if (should_skip_chunk_by_offset(cm)) { - continue; - } - chunk_reader_->reset(); - if (RET_FAIL(chunk_reader_->load_by_meta(cm))) { - } - break; - } else { - ChunkMeta* value_cm = value_chunk_meta_cursor_.get(); - ChunkMeta* time_cm = time_chunk_meta_cursor_.get(); - advance_to_next_chunk(); - if (filter != nullptr && - value_cm->statistic_ != nullptr && - !filter->satisfy(value_cm->statistic_)) { - continue; - } - if (should_skip_chunk_by_time(value_cm, - min_time_hint)) { - continue; - } - if (should_skip_aligned_chunk_by_offset(time_cm, - value_cm)) { - continue; - } - chunk_reader_->reset(); - if (RET_FAIL(chunk_reader_->load_by_aligned_meta( - time_cm, value_cm))) { - } - break; + ChunkMeta* value_cm = value_chunk_meta_cursor_.get(); + ChunkMeta* time_cm = time_chunk_meta_cursor_.get(); + advance_to_next_chunk(); + // Use time chunk statistics for time-based filtering. + ChunkMeta* filter_cm = + (time_cm->statistic_ != nullptr) ? time_cm : value_cm; + if (filter != nullptr && filter_cm->statistic_ != nullptr && + !filter->satisfy(filter_cm->statistic_)) { + continue; } + if (should_skip_chunk_by_time(filter_cm, min_time_hint)) { + continue; + } + if (should_skip_chunk_by_offset(value_cm)) { + continue; + } + chunk_reader_->reset(); + if (RET_FAIL(chunk_reader_->load_by_aligned_meta( + time_cm, value_cm))) { + } + break; } } } if (IS_SUCC(ret)) { if (alloc && ret_tsblock == nullptr) { - ret_tsblock = alloc_tsblock(); + ret_tsblock = + is_multi_value_ ? alloc_tsblock_multi() : alloc_tsblock(); } ret = chunk_reader_->get_next_page(ret_tsblock, filter, *data_pa_, min_time_hint, row_offset_, row_limit_); } + if (ret == common::E_NO_MORE_DATA && ret_tsblock != nullptr && + ret_tsblock->get_row_count() > 0) { + return E_OK; + } // When current chunk is exhausted (e.g. all pages skipped by offset) // but there are more chunks, load next chunk and retry. if (ret == common::E_NO_MORE_DATA && has_next_chunk()) { ret = E_OK; - force_load_next_chunk = true; continue; } return ret; @@ -178,7 +185,16 @@ void TsFileSeriesScanIterator::revert_tsblock() { int TsFileSeriesScanIterator::init_chunk_reader() { int ret = E_OK; - is_aligned_ = itimeseries_index_->is_aligned(); + is_aligned_ = itimeseries_index_->get_data_type() == common::VECTOR; + + // Check if this is a multi-value aligned index. alloc_multi_ssi() creates + // MultiAlignedTimeseriesIndex even when the query selects one value column, + // so keep that path consistent with wider aligned reads. + if (is_aligned_ && dynamic_cast( + itimeseries_index_) != nullptr) { + return init_chunk_reader_multi(); + } + if (!is_aligned_) { void* buf = common::mem_alloc(sizeof(ChunkReader), common::MOD_CHUNK_READER); @@ -205,6 +221,63 @@ int TsFileSeriesScanIterator::init_chunk_reader() { return ret; } +int TsFileSeriesScanIterator::init_chunk_reader_multi() { + int ret = E_OK; + is_multi_value_ = true; + + void* buf = + common::mem_alloc(sizeof(AlignedChunkReader), common::MOD_CHUNK_READER); + auto* acr = new (buf) AlignedChunkReader; + chunk_reader_ = acr; + + uint32_t num_cols = itimeseries_index_->get_value_column_count(); +#ifdef ENABLE_THREADS + // Create decode thread pool once at SSI level, shared across all chunks. + if (num_cols > 1 && common::g_config_value_.parallel_read_enabled_) { + int max_threads = common::g_config_value_.read_thread_count_; + int nthreads = std::min((int)num_cols, max_threads); + decode_pool_ = new common::ThreadPool(nthreads); + acr->set_decode_pool(decode_pool_); + } +#endif + + // Init time cursor + time_chunk_meta_cursor_ = + itimeseries_index_->get_time_chunk_meta_list()->begin(); + + // Init all value cursors + value_chunk_meta_cursors_.resize(num_cols); + for (uint32_t c = 0; c < num_cols; c++) { + value_chunk_meta_cursors_[c] = + itimeseries_index_->get_value_chunk_meta_list(c)->begin(); + } + + // Init chunk reader + if (RET_FAIL( + acr->init(read_file_, itimeseries_index_->get_measurement_name(), + itimeseries_index_->get_data_type(), time_filter_))) { + return ret; + } + + // Load first chunk set + ChunkMeta* time_cm = time_chunk_meta_cursor_.get(); + std::vector value_cms; + value_cms.reserve(num_cols); + for (uint32_t c = 0; c < num_cols; c++) { + value_cms.push_back(value_chunk_meta_cursors_[c].get()); + } + + if (RET_FAIL(acr->load_by_aligned_meta_multi(time_cm, value_cms))) { + return ret; + } + + // Advance cursors + time_chunk_meta_cursor_++; + for (auto& cur : value_chunk_meta_cursors_) cur++; + + return ret; +} + TsBlock* TsFileSeriesScanIterator::alloc_tsblock() { ChunkHeader& ch = chunk_reader_->get_chunk_header(); @@ -225,4 +298,29 @@ TsBlock* TsFileSeriesScanIterator::alloc_tsblock() { return tsblock_; } -} // end namespace storage \ No newline at end of file +TsBlock* TsFileSeriesScanIterator::alloc_tsblock_multi() { + auto* acr = static_cast(chunk_reader_); + + // Time column + ColumnSchema time_cd("time", common::INT64, common::SNAPPY, + common::TS_2DIFF); + tuple_desc_.push_back(time_cd); + + // Value columns + uint32_t num_cols = acr->get_value_column_count(); + for (uint32_t c = 0; c < num_cols; c++) { + ChunkHeader& ch = acr->get_value_chunk_header(c); + ColumnSchema value_cd(ch.measurement_name_, ch.data_type_, + ch.compression_type_, ch.encoding_type_); + tuple_desc_.push_back(value_cd); + } + + tsblock_ = new TsBlock(&tuple_desc_); + if (E_OK != tsblock_->init()) { + delete tsblock_; + tsblock_ = nullptr; + } + return tsblock_; +} + +} // end namespace storage diff --git a/cpp/src/reader/tsfile_series_scan_iterator.h b/cpp/src/reader/tsfile_series_scan_iterator.h index 9e790a3d1..9e03f8665 100644 --- a/cpp/src/reader/tsfile_series_scan_iterator.h +++ b/cpp/src/reader/tsfile_series_scan_iterator.h @@ -31,6 +31,12 @@ #include "reader/filter/filter.h" #include "utils/util_define.h" +#ifdef ENABLE_THREADS +namespace common { +class ThreadPool; +} +#endif + namespace storage { class TsFileIOReader; @@ -50,6 +56,7 @@ class TsFileSeriesScanIterator { tsblock_(nullptr), time_filter_(nullptr), is_aligned_(false), + is_multi_value_(false), row_offset_(0), row_limit_(-1) {} ~TsFileSeriesScanIterator() { destroy(); } @@ -93,11 +100,29 @@ class TsFileSeriesScanIterator { int64_t min_time_hint = std::numeric_limits::min()); void revert_tsblock(); + // Multi-value: number of value columns in the TsBlock + uint32_t get_value_column_count() const { + if (is_multi_value_ && chunk_reader_) { + auto* acr = static_cast(chunk_reader_); + return acr->get_value_column_count(); + } + return 1; + } + + bool is_multi_value() const { return is_multi_value_; } + friend class TsFileIOReader; private: int init_chunk_reader(); + int init_chunk_reader_multi(); FORCE_INLINE bool has_next_chunk() const { + if (is_multi_value_) { + // All value cursors advance in lockstep; check first one + return !value_chunk_meta_cursors_.empty() && + value_chunk_meta_cursors_[0] != + itimeseries_index_->get_value_chunk_meta_list(0)->end(); + } if (is_aligned_) { return value_chunk_meta_cursor_ != itimeseries_index_->get_value_chunk_meta_list()->end(); @@ -107,7 +132,10 @@ class TsFileSeriesScanIterator { } } FORCE_INLINE void advance_to_next_chunk() { - if (is_aligned_) { + if (is_multi_value_) { + time_chunk_meta_cursor_++; + for (auto& cur : value_chunk_meta_cursors_) cur++; + } else if (is_aligned_) { time_chunk_meta_cursor_++; value_chunk_meta_cursor_++; } else { @@ -119,15 +147,8 @@ class TsFileSeriesScanIterator { } bool should_skip_chunk_by_time(ChunkMeta* cm, int64_t min_time_hint); bool should_skip_chunk_by_offset(ChunkMeta* cm); - /** - * Aligned (VECTOR): whole-chunk skip by row count is only safe when the - * time ChunkMeta and value ChunkMeta agree on statistic count (>0). If - * either side lacks count or counts differ, skip is disabled for this - * chunk; pages are loaded and page/row-level offset handling applies. - */ - bool should_skip_aligned_chunk_by_offset(ChunkMeta* time_cm, - ChunkMeta* value_cm); common::TsBlock* alloc_tsblock(); + common::TsBlock* alloc_tsblock_multi(); private: ReadFile* read_file_; @@ -140,14 +161,22 @@ class TsFileSeriesScanIterator { common::SimpleList::Iterator chunk_meta_cursor_; common::SimpleList::Iterator time_chunk_meta_cursor_; common::SimpleList::Iterator value_chunk_meta_cursor_; + // Multi-value: one cursor per value column + std::vector::Iterator> + value_chunk_meta_cursors_; IChunkReader* chunk_reader_; common::TupleDesc tuple_desc_; common::TsBlock* tsblock_; Filter* time_filter_; bool is_aligned_ = false; + bool is_multi_value_ = false; int row_offset_; int row_limit_; +#ifdef ENABLE_THREADS + common::ThreadPool* decode_pool_ = + nullptr; // owned, for multi-value decode +#endif }; } // end namespace storage diff --git a/cpp/src/ts_2diff.h b/cpp/src/ts_2diff.h new file mode 100644 index 000000000..4300794aa --- /dev/null +++ b/cpp/src/ts_2diff.h @@ -0,0 +1,267 @@ +// +// Created by 李烁麟 on 25-8-3. +// + +#ifndef TS_2DIFF_H +#define TS_2DIFF_H + +#include +#include +#include + +#include "utils.h" + +template +struct always_false : std::false_type {}; + +template ::value || + std::is_same::value>::type> +class ts_2diff_encoder { + public: + ts_2diff_encoder() { + block_size_ = 128; + delta_array_ = new T[block_size_]; + write_index_ = -1; + bits_left_ = 8; + buffer_ = 0; + delta_array_min_ = 0; + delta_array_max_ = 0; + first_value_ = 0; + previous_value_ = 0; + cap_ = 1024; + cur_position_ = 0; + data_array_ = (uint8_t*)malloc(cap_); + } + + ~ts_2diff_encoder() { free(data_array_); } + + void do_encode(T value) { + // Record first value. + if (write_index_ == -1) { + first_value_ = value; + previous_value_ = first_value_; + write_index_++; + return; + } + + T delta = value - previous_value_; + previous_value_ = value; + + if (write_index_ == 0) { + delta_array_min_ = delta; + delta_array_max_ = delta; + } + + delta_array_min_ = std::min(delta, delta_array_min_); + delta_array_max_ = std::max(delta, delta_array_max_); + delta_array_[write_index_++] = delta; + + if (write_index_ >= block_size_) { + flush_internal(); + } + } + + int32_t flush(uint8_t*& data) { + if (write_index_ != -1) { + flush_internal(); + } + data = data_array_; + return cur_position_; + } + + // Flush data into local data array. + void flush_internal() { + uint8_t* data = nullptr; + int data_size = write_to_binary(data); + if (data_size == 0) { + return; + } + if (cap_ - cur_position_ < data_size) { + cap_ *= 2; + data_array_ = (uint8_t*)realloc(data_array_, cap_); + } + memcpy(data_array_ + cur_position_, data, data_size); + free(data); + cur_position_ += data_size; + } + + int write_to_binary(uint8_t*& data) { + uint8_t* value = nullptr; + if (write_index_ == -1) { + return 0; + } + + for (int i = 0; i < write_index_; ++i) { + delta_array_[i] -= delta_array_min_; + } + + int bit_width = 0; + T delta_max = delta_array_max_ - delta_array_min_; + while (delta_max > 0) { + bit_width++; + delta_max >>= 1; + } + + int size = 4 * 4 + (bit_width * write_index_ + 7) / 8; + value = (uint8_t*)malloc(size); + data = value; + + write_uint32(write_index_, value); + write_uint32(bit_width, value); + if constexpr (std::is_same::value) { + write_uint32(static_cast(delta_array_min_), value); + write_uint32(static_cast(first_value_), value); + } else if constexpr (std::is_same::value) { + write_uint64(static_cast(delta_array_min_), value); + write_uint64(static_cast(first_value_), value); + } else { + static_assert(always_false::value, "Unsupport type T"); + } + + for (int i = 0; i < write_index_; i++) { + write_bits(delta_array_[i], bit_width, value); + } + + if (bits_left_ != 0 && bits_left_ != 8) { + bits_left_ = 0; + flush_byte_if_full(value); + } + write_index_ = -1; + return size; + } + + void write_bits(int64_t value, int bits, uint8_t*& out_stream) { + while (bits > 0) { + int shift = bits - bits_left_; + if (shift >= 0) { + buffer_ |= + (uint8_t)((value >> shift) & ((1 << bits_left_) - 1)); + bits -= bits_left_; + bits_left_ = 0; + } else { + shift = bits_left_ - bits; + buffer_ |= (uint8_t)(value << shift); + bits_left_ -= bits; + bits = 0; + } + flush_byte_if_full(out_stream); + } + } + + void flush_byte_if_full(uint8_t*& out_stream) { + if (bits_left_ == 0) { + memcpy(out_stream, &buffer_, 1); + out_stream++; + buffer_ = 0; + bits_left_ = 8; + } + } + + private: + int block_size_; + T* delta_array_; + T first_value_; + T previous_value_; + T delta_array_min_; + T delta_array_max_; + uint8_t buffer_; + uint8_t* data_array_ = nullptr; + int32_t cap_; + int32_t cur_position_; + int bits_left_; + int write_index_; +}; + +template ::value || + std::is_same::value>::type> +class ts_2diff_decoder { + public: + explicit ts_2diff_decoder(uint8_t* data_array) { + write_index_ = -1; + bits_left_ = 0; + stored_value_ = 0; + buffer_ = 0; + delta_min_ = 0; + previous_value_ = 0; + bit_width_ = 0; + current_index_ = 0; + data_array_ = data_array; + } + + T decode() { + T ret_value = 0; + if (current_index_ == 0) { + write_index_ = read_ui32(data_array_); + bit_width_ = read_ui32(data_array_); + if constexpr (std::is_same::value) { + delta_min_ = read_ui32(data_array_); + previous_value_ = read_ui32(data_array_); + } else if constexpr (std::is_same::value) { + delta_min_ = read_ui64(data_array_); + previous_value_ = read_ui64(data_array_); + } else { + static_assert(always_false::value, "Unsupport type T"); + } + ret_value = previous_value_; + if (write_index_ == 0) { + current_index_ = 0; + } else { + current_index_ = 1; + } + return ret_value; + } + if (current_index_++ >= write_index_) { + current_index_ = 0; + } + stored_value_ = read_long(bit_width_, data_array_); + ret_value = stored_value_ + previous_value_ + delta_min_; + previous_value_ = ret_value; + return ret_value; + } + void read_byte_if_empty(uint8_t*& in) { + if (bits_left_ == 0) { + memcpy(&buffer_, in, 1); + bits_left_ = 8; + in += 1; + } + } + int64_t read_long(int bits, uint8_t*& in) { + int64_t value = 0; + while (bits > 0) { + read_byte_if_empty(in); + if (bits > bits_left_ || bits == 8) { + auto d = (uint8_t)(buffer_ & ((1 << bits_left_) - 1)); + value = (value << bits_left_) + (d & 0xFF); + bits -= bits_left_; + bits_left_ = 0; + } else { + auto d = (uint8_t)((((uint8_t)buffer_) >> (bits_left_ - bits)) & + ((1 << bits) - 1)); + value = (value << bits) + (d & 0xFF); + bits_left_ -= bits; + bits = 0; + } + if (bits <= 0 && current_index_ == 0) { + break; + } + } + return value; + } + + private: + T stored_value_; + T delta_min_; + T previous_value_; + uint8_t buffer_; + int bits_left_; + int bit_width_; + int write_index_; + int current_index_; + uint8_t* data_array_; +}; + +#endif // TS_2DIFF_H diff --git a/cpp/src/utils.h b/cpp/src/utils.h new file mode 100644 index 000000000..01e74d746 --- /dev/null +++ b/cpp/src/utils.h @@ -0,0 +1,55 @@ +// +// Created by 李烁麟 on 25-8-4. +// + +#ifndef UTILS_H +#define UTILS_H +#include + +static uint32_t read_ui32(uint8_t*& data) { + uint8_t buf[4]; + memcpy(buf, data, 4); + data += 4; + uint32_t ui32 = buf[0]; + ui32 = (ui32 << 8) | (buf[1] & 0xFF); + ui32 = (ui32 << 8) | (buf[2] & 0xFF); + ui32 = (ui32 << 8) | (buf[3] & 0xFF); + return ui32; +} + +static uint64_t read_ui64(uint8_t*& data) { + uint8_t buf[8]; + memcpy(buf, data, 8); + data += 8; + uint64_t ui64 = buf[0]; + for (int i = 1; i < 8; ++i) { + ui64 = (ui64 << 8) | (buf[i] & 0xFF); + } + return ui64; +} + +static void write_uint32(const uint32_t ui32, uint8_t*& value) { + uint8_t buf[4]; + buf[0] = static_cast((ui32 >> 24) & 0xFF); + buf[1] = static_cast((ui32 >> 16) & 0xFF); + buf[2] = static_cast((ui32 >> 8) & 0xFF); + buf[3] = static_cast((ui32) & 0xFF); + memcpy(value, buf, 4); + value += 4; +} + +static void write_uint64(const uint64_t ui64, uint8_t*& value) { + uint8_t buf[8]; + buf[0] = static_cast((ui64 >> 56) & 0xFF); + buf[1] = static_cast((ui64 >> 48) & 0xFF); + buf[2] = static_cast((ui64 >> 40) & 0xFF); + buf[3] = static_cast((ui64 >> 32) & 0xFF); + buf[4] = static_cast((ui64 >> 24) & 0xFF); + buf[5] = static_cast((ui64 >> 16) & 0xFF); + buf[6] = static_cast((ui64 >> 8) & 0xFF); + buf[7] = static_cast((ui64) & 0xFF); + memcpy(value, buf, 8); + value += 8; +} + +#endif // UTILS_H diff --git a/cpp/src/utils/db_utils.h b/cpp/src/utils/db_utils.h index 4ffc4d138..832e5c167 100644 --- a/cpp/src/utils/db_utils.h +++ b/cpp/src/utils/db_utils.h @@ -23,8 +23,8 @@ #include #include #include // memcpy +#include -#include #include #include #include @@ -195,11 +195,12 @@ struct ColumnSchema { }; FORCE_INLINE int64_t get_cur_timestamp() { - // Milliseconds since the Unix epoch. Uses the C++11 standard library so it - // is portable across platforms (gettimeofday is not available on MSVC). - return std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); + int64_t timestamp = 0; + struct timeval tv; + if (gettimeofday(&tv, NULL) >= 0) { + timestamp = (int64_t)tv.tv_sec * 1000 + tv.tv_usec / 1000; + } + return timestamp; } } // end namespace common diff --git a/cpp/src/utils/storage_utils.h b/cpp/src/utils/storage_utils.h index 3cb06762a..78cc775c1 100644 --- a/cpp/src/utils/storage_utils.h +++ b/cpp/src/utils/storage_utils.h @@ -23,7 +23,6 @@ #include #include -#include #include "common/datatype/value.h" #include "common/tsblock/tsblock.h" diff --git a/cpp/src/utils/util_define.h b/cpp/src/utils/util_define.h index 3cbe1d4c9..2796dfb0f 100644 --- a/cpp/src/utils/util_define.h +++ b/cpp/src/utils/util_define.h @@ -23,67 +23,6 @@ #include #include -/* ======== platform compatibility ======== - * - * MSVC does not provide several POSIX types/functions/macros used across the - * codebase. Provide drop-in equivalents so the same source compiles on both - * GCC/Clang (Linux) and MSVC (Windows) without scattering #ifdefs. - */ -#ifdef _WIN32 -#include -#include - -#if defined(_MSC_VER) -// ssize_t is a signed, pointer-sized integer; intptr_t (from , -// included above) is exactly that. We deliberately avoid /SSIZE_T -// because that header also pollutes the global namespace with INT32/INT64 -// typedefs, which collide with the project's own INT32/INT64 enum values. -typedef intptr_t ssize_t; -typedef int mode_t; -#endif // _MSC_VER - -// access() mode flags (POSIX ); MSVC's _access uses the same bits. -#ifndef F_OK -#define F_OK 0 -#endif -#ifndef X_OK -#define X_OK 1 -#endif -#ifndef W_OK -#define W_OK 2 -#endif -#ifndef R_OK -#define R_OK 4 -#endif - -#ifndef strcasecmp -#define strcasecmp _stricmp -#endif -#ifndef strncasecmp -#define strncasecmp _strnicmp -#endif -#endif // _WIN32 - -/* ======== shared-library symbol visibility ======== - * - * Functions are exported from tsfile.dll automatically via - * CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS, but global DATA symbols (plain variables, - * static class members) are not reliably auto-exported, and a consumer must - * see __declspec(dllimport) to reference them across the DLL boundary. Mark - * such symbols with TSFILE_API: it expands to dllexport while building the - * library (TSFILE_BUILDING is defined for its own translation units), - * dllimport for external consumers, and nothing on non-MSVC toolchains. - */ -#if defined(_MSC_VER) -#if defined(TSFILE_BUILDING) -#define TSFILE_API __declspec(dllexport) -#else -#define TSFILE_API __declspec(dllimport) -#endif -#else -#define TSFILE_API -#endif - /* ======== unsued ======== */ #define UNUSED(v) ((void)(v)) #if __cplusplus >= 201703L @@ -95,10 +34,8 @@ typedef int mode_t; #endif /* ======== inline ======== */ -#if defined(__GNUC__) || defined(__clang__) +#ifdef __GNUC__ #define FORCE_INLINE inline __attribute__((always_inline)) -#elif defined(_MSC_VER) -#define FORCE_INLINE __forceinline #else #define FORCE_INLINE inline #endif // __GNUC__ @@ -154,19 +91,7 @@ typedef int mode_t; #define STATIC_ASSERT(cond, msg) static_assert((cond), #msg) #endif // __cplusplus < 201103L -/* ======== atomic operation ======== - * - * The ATOMIC_* macros operate on the address of a plain (non-std::atomic) - * scalar, matching the semantics of the GCC/Clang __atomic builtins. - * - * - On GCC/Clang the builtins are used directly (unchanged behaviour). - * - On other compilers (MSVC) they are implemented on top of C++11 - * via helper templates. Reinterpreting a plain scalar's address as a - * std::atomic* is well-defined in practice for lock-free integral types - * (this is exactly what C++20 std::atomic_ref formalizes); all current call - * sites use naturally-aligned integral members. - */ -#if defined(__GNUC__) || defined(__clang__) +/* ======== atomic operation ======== */ #define ATOMIC_FAA(val_addr, addv) \ __atomic_fetch_add((val_addr), (addv), __ATOMIC_SEQ_CST) #define ATOMIC_AAF(val_addr, addv) \ @@ -187,67 +112,9 @@ typedef int mode_t; #define ATOMIC_LOAD(val_addr) __atomic_load_n((val_addr), __ATOMIC_SEQ_CST) #define ATOMIC_STORE(val_addr, val) \ __atomic_store_n((val_addr), (val), __ATOMIC_SEQ_CST) -#elif defined(__cplusplus) -#include -namespace common { -namespace util_atomic { -template -inline std::atomic* as_atomic(T* p) { - return reinterpret_cast*>(p); -} -template -inline const std::atomic* as_atomic(const T* p) { - return reinterpret_cast*>(p); -} -// fetch-and-add: returns the value held *before* the addition. -template -inline T faa(T* p, V v) { - return as_atomic(p)->fetch_add(static_cast(v), - std::memory_order_seq_cst); -} -// add-and-fetch: returns the value held *after* the addition. -template -inline T aaf(T* p, V v) { - return static_cast( - as_atomic(p)->fetch_add(static_cast(v), std::memory_order_seq_cst) + - static_cast(v)); -} -// compare-and-swap: returns true on success; on failure writes the current -// value into *expected (same contract as __atomic_compare_exchange_n). -template -inline bool cas(T* p, T* expected, D desired) { - return as_atomic(p)->compare_exchange_strong( - *expected, static_cast(desired), std::memory_order_seq_cst); -} -template -inline T load(const T* p) { - return as_atomic(p)->load(std::memory_order_seq_cst); -} -template -inline void store(T* p, V v) { - as_atomic(p)->store(static_cast(v), std::memory_order_seq_cst); -} -} // namespace util_atomic -} // namespace common -#define ATOMIC_FAA(val_addr, addv) \ - (::common::util_atomic::faa((val_addr), (addv))) -#define ATOMIC_AAF(val_addr, addv) \ - (::common::util_atomic::aaf((val_addr), (addv))) -#define ATOMIC_CAS(val_addr, expected, desired) \ - (::common::util_atomic::cas((val_addr), (expected), (desired))) -#define ATOMIC_LOAD(val_addr) (::common::util_atomic::load((val_addr))) -#define ATOMIC_STORE(val_addr, val) \ - (::common::util_atomic::store((val_addr), (val))) -#endif // atomic operation /* ======== align ======== */ -#if defined(__GNUC__) || defined(__clang__) #define ALIGNED(a) __attribute__((aligned(a))) -#elif defined(_MSC_VER) -#define ALIGNED(a) __declspec(align(a)) -#else -#define ALIGNED(a) -#endif #define ALIGNED_4 ALIGNED(4) #define ALIGNED_8 ALIGNED(8) diff --git a/cpp/src/writer/chunk_writer.cc b/cpp/src/writer/chunk_writer.cc index da1811336..acdb4951d 100644 --- a/cpp/src/writer/chunk_writer.cc +++ b/cpp/src/writer/chunk_writer.cc @@ -138,6 +138,9 @@ int ChunkWriter::seal_cur_page(bool end_chunk) { void ChunkWriter::save_first_page_data(PageWriter& first_page_writer) { first_page_data_ = first_page_writer.get_cur_page_data(); first_page_statistic_->deep_copy_from(first_page_writer.get_statistic()); + // See ValueChunkWriter::save_first_page_data: avoid double-free on the + // shallow-copied buffer pointers. + first_page_writer.release_cur_page_data(); } int ChunkWriter::write_first_page_data(ByteStream& pages_data, diff --git a/cpp/src/writer/chunk_writer.h b/cpp/src/writer/chunk_writer.h index 6eb3f5418..7530b6d48 100644 --- a/cpp/src/writer/chunk_writer.h +++ b/cpp/src/writer/chunk_writer.h @@ -103,6 +103,65 @@ class ChunkWriter { CW_DO_WRITE_FOR_TYPE(); } + template + int write_batch(const int64_t* timestamps, const T* values, + uint32_t count) { + int ret = common::E_OK; + uint32_t offset = 0; + while (offset < count) { + uint32_t cur_points = page_writer_.get_point_numer(); + uint32_t page_remaining = + common::g_config_value_.page_writer_max_point_num_ - cur_points; + if (page_remaining == 0) { + if (RET_FAIL(seal_cur_page(false))) { + return ret; + } + page_remaining = + common::g_config_value_.page_writer_max_point_num_; + } + uint32_t batch_size = std::min(count - offset, page_remaining); + if (RET_FAIL(page_writer_.write_batch( + timestamps + offset, values + offset, batch_size))) { + return ret; + } + offset += batch_size; + if (RET_FAIL(seal_cur_page_if_full())) { + return ret; + } + } + return ret; + } + + int write_string_batch(const int64_t* timestamps, const char* buffer, + const uint32_t* offsets, uint32_t start_idx, + uint32_t count) { + int ret = common::E_OK; + uint32_t offset = 0; + while (offset < count) { + uint32_t cur_points = page_writer_.get_point_numer(); + uint32_t page_remaining = + common::g_config_value_.page_writer_max_point_num_ - cur_points; + if (page_remaining == 0) { + if (RET_FAIL(seal_cur_page(false))) { + return ret; + } + page_remaining = + common::g_config_value_.page_writer_max_point_num_; + } + uint32_t batch_size = std::min(count - offset, page_remaining); + if (RET_FAIL(page_writer_.write_string_batch( + timestamps + offset, buffer, offsets, start_idx + offset, + batch_size))) { + return ret; + } + offset += batch_size; + if (RET_FAIL(seal_cur_page_if_full())) { + return ret; + } + } + return ret; + } + int end_encode_chunk(); common::ByteStream& get_chunk_data() { return chunk_data_; } Statistic* get_chunk_statistic() { return chunk_statistic_; } diff --git a/cpp/src/writer/page_writer.h b/cpp/src/writer/page_writer.h index d3966d865..0c25c3293 100644 --- a/cpp/src/writer/page_writer.h +++ b/cpp/src/writer/page_writer.h @@ -150,6 +150,43 @@ class PageWriter { PW_DO_WRITE_FOR_TYPE(); } + template + FORCE_INLINE int write_batch(const int64_t* timestamps, const T* values, + uint32_t count) { + int ret = common::E_OK; + if (count == 0) return ret; + if (RET_FAIL(time_encoder_->encode_batch(timestamps, count, + time_out_stream_))) { + } else if (RET_FAIL(value_encoder_->encode_batch(values, count, + value_out_stream_))) { + } else { + statistic_->update_batch(timestamps, values, count); + } + return ret; + } + + // Batch write strings from Arrow-style offset+buffer layout. + FORCE_INLINE int write_string_batch(const int64_t* timestamps, + const char* buffer, + const uint32_t* offsets, + uint32_t start_idx, uint32_t count) { + int ret = common::E_OK; + if (count == 0) return ret; + if (RET_FAIL(time_encoder_->encode_batch(timestamps, count, + time_out_stream_))) { + } else if (RET_FAIL(value_encoder_->encode_string_batch( + buffer, offsets, start_idx, count, value_out_stream_))) { + } else { + for (uint32_t i = 0; i < count; i++) { + uint32_t idx = start_idx + i; + uint32_t len = offsets[idx + 1] - offsets[idx]; + common::String val(buffer + offsets[idx], len); + statistic_->update(timestamps[i], val); + } + } + return ret; + } + FORCE_INLINE uint32_t get_point_numer() const { return statistic_->count_; } FORCE_INLINE uint32_t get_time_out_stream_size() const { return time_out_stream_.total_size(); @@ -179,6 +216,11 @@ class PageWriter { } FORCE_INLINE Statistic* get_statistic() { return statistic_; } PageData get_cur_page_data() { return cur_page_data_; } + // See ValuePageWriter::release_cur_page_data for rationale. + void release_cur_page_data() { + cur_page_data_.uncompressed_buf_ = nullptr; + cur_page_data_.compressed_buf_ = nullptr; + } void destroy_page_data() { cur_page_data_.destroy(); } private: @@ -194,7 +236,7 @@ class PageWriter { private: // static const uint32_t OUT_STREAM_PAGE_SIZE = 48; - static const uint32_t OUT_STREAM_PAGE_SIZE = 1024; + static const uint32_t OUT_STREAM_PAGE_SIZE = 65536; private: common::TSDataType data_type_; diff --git a/cpp/src/writer/time_chunk_writer.cc b/cpp/src/writer/time_chunk_writer.cc index 0c7e3b212..0a0623686 100644 --- a/cpp/src/writer/time_chunk_writer.cc +++ b/cpp/src/writer/time_chunk_writer.cc @@ -144,6 +144,9 @@ int TimeChunkWriter::seal_cur_page(bool end_chunk) { void TimeChunkWriter::save_first_page_data(TimePageWriter& first_page_writer) { first_page_data_ = first_page_writer.get_cur_page_data(); first_page_statistic_->deep_copy_from(first_page_writer.get_statistic()); + // See ValueChunkWriter::save_first_page_data: avoid double-free on the + // shallow-copied buffer pointers. + first_page_writer.release_cur_page_data(); } int TimeChunkWriter::write_first_page_data(ByteStream& pages_data, @@ -173,9 +176,6 @@ int TimeChunkWriter::end_encode_chunk() { chunk_header_.data_size_ = chunk_data_.total_size(); chunk_header_.num_of_pages_ = num_of_pages_; } - } else if (num_of_pages_ > 0) { - chunk_header_.data_size_ = chunk_data_.total_size(); - chunk_header_.num_of_pages_ = num_of_pages_; } #if DEBUG_SE std::cout << "end_encode_time_chunk: num_of_pages_=" << num_of_pages_ diff --git a/cpp/src/writer/time_chunk_writer.h b/cpp/src/writer/time_chunk_writer.h index c67516ba5..fc9cf9ebf 100644 --- a/cpp/src/writer/time_chunk_writer.h +++ b/cpp/src/writer/time_chunk_writer.h @@ -42,8 +42,7 @@ class TimeChunkWriter { first_page_data_(), first_page_statistic_(nullptr), chunk_header_(), - num_of_pages_(0), - enable_page_seal_if_full_(true) {} + num_of_pages_(0) {} ~TimeChunkWriter() { destroy(); } int init(const common::ColumnSchema& col_schema); int init(const std::string& measurement_name, common::TSEncoding encoding, @@ -58,9 +57,32 @@ class TimeChunkWriter { if (RET_FAIL(time_page_writer_.write(timestamp))) { return ret; } - if (UNLIKELY(!enable_page_seal_if_full_)) { + if (RET_FAIL(seal_cur_page_if_full())) { return ret; - } else { + } + return ret; + } + + int write_batch(const int64_t* timestamps, uint32_t count) { + int ret = common::E_OK; + uint32_t offset = 0; + while (offset < count) { + uint32_t cur_points = time_page_writer_.get_point_numer(); + uint32_t page_remaining = + common::g_config_value_.page_writer_max_point_num_ - cur_points; + if (page_remaining == 0) { + if (RET_FAIL(seal_cur_page(false))) { + return ret; + } + page_remaining = + common::g_config_value_.page_writer_max_point_num_; + } + uint32_t batch_size = std::min(count - offset, page_remaining); + if (RET_FAIL(time_page_writer_.write_batch(timestamps + offset, + batch_size))) { + return ret; + } + offset += batch_size; if (RET_FAIL(seal_cur_page_if_full())) { return ret; } @@ -73,29 +95,25 @@ class TimeChunkWriter { Statistic* get_chunk_statistic() { return chunk_statistic_; } FORCE_INLINE int32_t num_of_pages() const { return num_of_pages_; } + int64_t estimate_max_series_mem_size(); + + bool hasData(); + // Current (unsealed) page point count. FORCE_INLINE uint32_t get_point_numer() const { return time_page_writer_.get_point_numer(); } - int64_t estimate_max_series_mem_size(); - - bool hasData(); - /** True if the current (unsealed) page has at least one point. */ bool has_current_page_data() const { return time_page_writer_.get_point_numer() > 0; } - /** - * Force seal the current page (for aligned model: when any aligned page - * seals due to memory/point threshold, all pages must seal together). - * @return E_OK on success. - */ + /** Force seal the current page. */ int seal_current_page() { return seal_cur_page(false); } - // For aligned writer: allow disabling the automatic page-size/point-number - // check so the caller can seal pages at chosen boundaries. + // Allow disabling the automatic page-size/point-number check so the + // caller can seal pages at chosen boundaries. FORCE_INLINE void set_enable_page_seal_if_full(bool enable) { enable_page_seal_if_full_ = enable; } @@ -109,6 +127,9 @@ class TimeChunkWriter { common::g_config_value_.page_writer_max_memory_bytes_); } FORCE_INLINE int seal_cur_page_if_full() { + if (UNLIKELY(!enable_page_seal_if_full_)) { + return common::E_OK; + } if (UNLIKELY(is_cur_page_full())) { return seal_cur_page(false); } @@ -138,8 +159,7 @@ class TimeChunkWriter { ChunkHeader chunk_header_; int32_t num_of_pages_; - // If false, write() won't auto-seal when the current page becomes full. - bool enable_page_seal_if_full_; + bool enable_page_seal_if_full_ = true; }; } // end namespace storage diff --git a/cpp/src/writer/time_page_writer.h b/cpp/src/writer/time_page_writer.h index d9dcecff1..a9858260f 100644 --- a/cpp/src/writer/time_page_writer.h +++ b/cpp/src/writer/time_page_writer.h @@ -84,6 +84,28 @@ class TimePageWriter { return ret; } + int write_batch(const int64_t* timestamps, uint32_t count) { + int ret = common::E_OK; + if (count == 0) return ret; + // Check order: first timestamp vs existing end_time + if (statistic_->count_ != 0 && is_inited_ && + timestamps[0] <= statistic_->end_time_) { + return common::E_OUT_OF_ORDER; + } + // Check monotonicity within batch + for (uint32_t i = 1; i < count; i++) { + if (timestamps[i] <= timestamps[i - 1]) { + return common::E_OUT_OF_ORDER; + } + } + if (RET_FAIL(time_encoder_->encode_batch(timestamps, count, + time_out_stream_))) { + } else { + statistic_->update_time_batch(timestamps, count); + } + return ret; + } + FORCE_INLINE uint32_t get_point_numer() const { return statistic_->count_; } FORCE_INLINE uint32_t get_time_out_stream_size() const { return time_out_stream_.total_size(); @@ -102,6 +124,11 @@ class TimePageWriter { } FORCE_INLINE Statistic* get_statistic() { return statistic_; } TimePageData get_cur_page_data() { return cur_page_data_; } + // See ValuePageWriter::release_cur_page_data for rationale. + void release_cur_page_data() { + cur_page_data_.uncompressed_buf_ = nullptr; + cur_page_data_.compressed_buf_ = nullptr; + } void destroy_page_data() { cur_page_data_.destroy(); } private: @@ -115,7 +142,7 @@ class TimePageWriter { common::ByteStream& pages_data); private: - static const uint32_t OUT_STREAM_PAGE_SIZE = 1024; + static const uint32_t OUT_STREAM_PAGE_SIZE = 65536; private: common::TSDataType data_type_; diff --git a/cpp/src/writer/tsfile_table_writer.cc b/cpp/src/writer/tsfile_table_writer.cc index eb0319af8..c7a74a8f7 100644 --- a/cpp/src/writer/tsfile_table_writer.cc +++ b/cpp/src/writer/tsfile_table_writer.cc @@ -45,7 +45,7 @@ TsFileTableWriter::TsFileTableWriter( } // namespace storage -storage::TsFileTableWriter::~TsFileTableWriter() = default; +storage::TsFileTableWriter::~TsFileTableWriter() { close(); } int storage::TsFileTableWriter::register_table( const std::shared_ptr& table_schema) { @@ -66,21 +66,38 @@ int storage::TsFileTableWriter::write_table(storage::Tablet& tablet) const { tablet.get_table_name() != exclusive_table_name_) { return common::E_TABLE_NOT_EXIST; } - tablet.set_table_name(to_lower(tablet.get_table_name())); - for (size_t i = 0; i < tablet.get_column_count(); i++) { - tablet.set_column_name(i, to_lower(tablet.get_column_name(i))); - } + if (!names_lowered_) { + tablet.set_table_name(to_lower(tablet.get_table_name())); + for (size_t i = 0; i < tablet.get_column_count(); i++) { + tablet.set_column_name(i, to_lower(tablet.get_column_name(i))); + } - auto schema_map = tablet.get_schema_map(); - std::map schema_map_; - for (auto iter = schema_map.begin(); iter != schema_map.end(); iter++) { - schema_map_[to_lower(iter->first)] = iter->second; + auto schema_map = tablet.get_schema_map(); + std::map new_schema_map; + for (auto iter = schema_map.begin(); iter != schema_map.end(); iter++) { + new_schema_map[to_lower(iter->first)] = iter->second; + } + tablet.set_schema_map(new_schema_map); + names_lowered_ = true; } - tablet.set_schema_map(schema_map_); return tsfile_writer_->write_table(tablet); } -int storage::TsFileTableWriter::flush() { return tsfile_writer_->flush(); } +int storage::TsFileTableWriter::flush() { + if (closed_) { + return common::E_OK; + } + return tsfile_writer_->flush(); +} -int storage::TsFileTableWriter::close() { return tsfile_writer_->close(); } +int storage::TsFileTableWriter::close() { + if (closed_) { + return common::E_OK; + } + closed_ = true; + if (!tsfile_writer_) { + return common::E_OK; + } + return tsfile_writer_->close(); +} diff --git a/cpp/src/writer/tsfile_table_writer.h b/cpp/src/writer/tsfile_table_writer.h index ce18bc007..8f74a4cd0 100644 --- a/cpp/src/writer/tsfile_table_writer.h +++ b/cpp/src/writer/tsfile_table_writer.h @@ -124,6 +124,11 @@ class TsFileTableWriter { // Some errors may not be conveyed during the construction phase, so it's // necessary to maintain an internal error code. int error_number = common::E_OK; + + // Track whether tablet names have already been lowered to avoid + // redundant string allocations on every write_table call. + mutable bool names_lowered_ = false; + bool closed_ = false; }; } // namespace storage diff --git a/cpp/src/writer/tsfile_writer.cc b/cpp/src/writer/tsfile_writer.cc index 3170a3160..aee227661 100644 --- a/cpp/src/writer/tsfile_writer.cc +++ b/cpp/src/writer/tsfile_writer.cc @@ -19,17 +19,13 @@ #include "tsfile_writer.h" -#ifdef _WIN32 -#include -#else #include -#endif + +#include +#include #include "chunk_writer.h" #include "common/config/config.h" -#ifdef ENABLE_THREADS -#include "common/thread_pool.h" -#endif #include "file/restorable_tsfile_io_writer.h" #include "file/tsfile_io_writer.h" #include "file/write_file.h" @@ -57,10 +53,6 @@ int libtsfile_init() { } void libtsfile_destroy() { -#ifdef ENABLE_THREADS - delete common::g_write_thread_pool_; - common::g_write_thread_pool_ = nullptr; -#endif ModStat::get_instance().destroy(); libtsfile::g_s_is_inited = false; } @@ -72,10 +64,6 @@ void set_max_degree_of_index_node(uint32_t max_degree_of_index_node) { config_set_max_degree_of_index_node(max_degree_of_index_node); } -void set_strict_page_size(bool strict_page_size) { - config_set_strict_page_size(strict_page_size); -} - TsFileWriter::TsFileWriter() : write_file_(nullptr), io_writer_(nullptr), @@ -85,8 +73,7 @@ TsFileWriter::TsFileWriter() record_count_for_next_mem_check_( g_config_value_.record_count_for_next_mem_check_), write_file_created_(false), - io_writer_owned_(true), - enforce_recovered_last_time_order_(false) {} + io_writer_owned_(true) {} TsFileWriter::~TsFileWriter() { destroy(); } @@ -132,7 +119,6 @@ int TsFileWriter::init(WriteFile* write_file) { write_file_ = write_file; write_file_created_ = false; io_writer_owned_ = true; - enforce_recovered_last_time_order_ = false; io_writer_ = new TsFileIOWriter(); io_writer_->init(write_file_); return E_OK; @@ -152,7 +138,6 @@ int TsFileWriter::init(RestorableTsFileIOWriter* rw) { write_file_ = rw->get_write_file(); write_file_created_ = false; io_writer_owned_ = false; - enforce_recovered_last_time_order_ = true; io_writer_ = rw; const std::vector& recovered = @@ -189,10 +174,6 @@ int TsFileWriter::init(RestorableTsFileIOWriter* rw) { if (cm == nullptr) { continue; } - if (cm->statistic_ != nullptr && cm->statistic_->count_ > 0) { - group->last_time_ = - std::max(group->last_time_, cm->statistic_->end_time_); - } std::string mname = cm->measurement_name_.to_std_string(); if (mname.empty()) { continue; @@ -683,6 +664,10 @@ int64_t TsFileWriter::calculate_mem_size_for_all_group() { return mem_total_size; } +int64_t TsFileWriter::calculate_meta_mem_size() const { + return io_writer_->get_meta_size(); +} + /** * check occupied memory size, if it exceeds the chunkGroupSize threshold, flush * them to given OutputStream. @@ -703,22 +688,13 @@ int TsFileWriter::check_memory_size_and_may_flush_chunks() { int TsFileWriter::write_record(const TsRecord& record) { int ret = E_OK; - auto device_id = std::make_shared(record.device_id_); - auto schema_it = schemas_.find(device_id); - if (schema_it == schemas_.end() || schema_it->second == nullptr) { - return E_DEVICE_NOT_EXIST; - } - MeasurementSchemaGroup* device_schema = schema_it->second; - if (enforce_recovered_last_time_order_ && - record.timestamp_ <= device_schema->last_time_) { - return E_OUT_OF_ORDER; - } // std::vector chunk_writers; SimpleVector chunk_writers; SimpleVector data_types; MeasurementNamesFromRecord mnames_getter(record); - if (RET_FAIL(do_check_schema(device_id, mnames_getter, chunk_writers, - data_types))) { + if (RET_FAIL(do_check_schema( + std::make_shared(record.device_id_), + mnames_getter, chunk_writers, data_types))) { return ret; } @@ -733,8 +709,6 @@ int TsFileWriter::write_record(const TsRecord& record) { record.points_[c]); } - device_schema->last_time_ = - std::max(device_schema->last_time_, record.timestamp_); record_count_since_last_flush_++; ret = check_memory_size_and_may_flush_chunks(); return ret; @@ -742,36 +716,19 @@ int TsFileWriter::write_record(const TsRecord& record) { int TsFileWriter::write_record_aligned(const TsRecord& record) { int ret = E_OK; - auto device_id = std::make_shared(record.device_id_); - auto schema_it = schemas_.find(device_id); - if (schema_it == schemas_.end() || schema_it->second == nullptr) { - return E_DEVICE_NOT_EXIST; - } - MeasurementSchemaGroup* device_schema = schema_it->second; - if (enforce_recovered_last_time_order_ && - record.timestamp_ <= device_schema->last_time_) { - return E_OUT_OF_ORDER; - } SimpleVector value_chunk_writers; SimpleVector data_types; TimeChunkWriter* time_chunk_writer; MeasurementNamesFromRecord mnames_getter(record); - if (RET_FAIL(do_check_schema_aligned(device_id, mnames_getter, - time_chunk_writer, value_chunk_writers, - data_types))) { + if (RET_FAIL(do_check_schema_aligned( + std::make_shared(record.device_id_), + mnames_getter, time_chunk_writer, value_chunk_writers, + data_types))) { return ret; } if (value_chunk_writers.size() != record.points_.size()) { return E_INVALID_ARG; } - int32_t time_pages_before = time_chunk_writer->num_of_pages(); - std::vector value_pages_before(value_chunk_writers.size(), 0); - for (uint32_t c = 0; c < value_chunk_writers.size(); c++) { - ValueChunkWriter* value_chunk_writer = value_chunk_writers[c]; - if (!IS_NULL(value_chunk_writer)) { - value_pages_before[c] = value_chunk_writer->num_of_pages(); - } - } time_chunk_writer->write(record.timestamp_); for (uint32_t c = 0; c < value_chunk_writers.size(); c++) { ValueChunkWriter* value_chunk_writer = value_chunk_writers[c]; @@ -781,13 +738,6 @@ int TsFileWriter::write_record_aligned(const TsRecord& record) { write_point_aligned(value_chunk_writer, record.timestamp_, data_types[c], record.points_[c]); } - if (RET_FAIL(maybe_seal_aligned_pages_together( - time_chunk_writer, value_chunk_writers, time_pages_before, - value_pages_before))) { - return ret; - } - device_schema->last_time_ = - std::max(device_schema->last_time_, record.timestamp_); return ret; } @@ -849,328 +799,74 @@ int TsFileWriter::write_point_aligned(ValueChunkWriter* value_chunk_writer, } } -int TsFileWriter::maybe_seal_aligned_pages_together( - TimeChunkWriter* time_chunk_writer, - common::SimpleVector& value_chunk_writers, - int32_t time_pages_before, const std::vector& value_pages_before) { - bool should_seal_all = - time_chunk_writer->num_of_pages() > time_pages_before; - for (uint32_t c = 0; c < value_chunk_writers.size() && !should_seal_all; - c++) { - ValueChunkWriter* value_chunk_writer = value_chunk_writers[c]; - if (!IS_NULL(value_chunk_writer) && - value_chunk_writer->num_of_pages() > value_pages_before[c]) { - should_seal_all = true; - break; - } - } - if (!should_seal_all) { - return E_OK; - } - - int ret = E_OK; - if (time_chunk_writer->has_current_page_data() && - RET_FAIL(time_chunk_writer->seal_current_page())) { - return ret; - } - for (uint32_t c = 0; c < value_chunk_writers.size(); c++) { - ValueChunkWriter* value_chunk_writer = value_chunk_writers[c]; - if (!IS_NULL(value_chunk_writer) && - value_chunk_writer->has_current_page_data() && - RET_FAIL(value_chunk_writer->seal_current_page())) { - return ret; - } - } - return ret; -} - int TsFileWriter::write_tablet_aligned(const Tablet& tablet) { int ret = E_OK; - auto device_id = - std::make_shared(tablet.insert_target_name_); - auto schema_it = schemas_.find(device_id); - if (schema_it == schemas_.end() || schema_it->second == nullptr) { - return E_DEVICE_NOT_EXIST; - } - MeasurementSchemaGroup* device_schema = schema_it->second; - const uint32_t total_rows = tablet.get_cur_row_size(); - if (enforce_recovered_last_time_order_ && total_rows > 0 && - tablet.timestamps_[0] <= device_schema->last_time_) { - return E_OUT_OF_ORDER; - } SimpleVector value_chunk_writers; TimeChunkWriter* time_chunk_writer = nullptr; SimpleVector data_types; MeasurementNamesFromTablet mnames_getter(tablet); - if (RET_FAIL(do_check_schema_aligned(device_id, mnames_getter, - time_chunk_writer, value_chunk_writers, - data_types))) { + if (RET_FAIL(do_check_schema_aligned( + std::make_shared(tablet.insert_target_name_), + mnames_getter, time_chunk_writer, value_chunk_writers, + data_types))) { return ret; } - const bool strict_page_size = common::g_config_value_.strict_page_size_; - - // Decide whether we have string/blob/text columns. - bool has_varlen_column = false; - for (uint32_t i = 0; i < data_types.size(); i++) { - if (data_types[i] == common::STRING || data_types[i] == common::TEXT || - data_types[i] == common::BLOB) { - has_varlen_column = true; - break; - } - } - - // Keep writers' seal-check behavior consistent across calls. - time_chunk_writer->set_enable_page_seal_if_full(strict_page_size); - for (uint32_t c = 0; c < value_chunk_writers.size(); c++) { - if (!IS_NULL(value_chunk_writers[c])) { - value_chunk_writers[c]->set_enable_page_seal_if_full( - strict_page_size); - } - } - - if (strict_page_size) { - // Strict mode: keep the original row-based insertion to ensure aligned - // pages seal together when either side becomes full. - for (uint32_t row = 0; row < total_rows; row++) { - int32_t time_pages_before = time_chunk_writer->num_of_pages(); - std::vector value_pages_before(value_chunk_writers.size(), - 0); - for (uint32_t c = 0; c < value_chunk_writers.size(); c++) { - ValueChunkWriter* value_chunk_writer = value_chunk_writers[c]; - if (!IS_NULL(value_chunk_writer)) { - value_pages_before[c] = value_chunk_writer->num_of_pages(); - } - } - - if (RET_FAIL(time_chunk_writer->write(tablet.timestamps_[row]))) { - return ret; - } - ASSERT(value_chunk_writers.size() == tablet.get_column_count()); - for (uint32_t c = 0; c < value_chunk_writers.size(); c++) { - ValueChunkWriter* value_chunk_writer = value_chunk_writers[c]; - if (IS_NULL(value_chunk_writer)) { - continue; - } - if (RET_FAIL(value_write_column(value_chunk_writer, tablet, c, - row, row + 1))) { - return ret; - } - } - if (RET_FAIL(maybe_seal_aligned_pages_together( - time_chunk_writer, value_chunk_writers, time_pages_before, - value_pages_before))) { - return ret; - } - } - if (total_rows > 0) { - device_schema->last_time_ = std::max( - device_schema->last_time_, tablet.timestamps_[total_rows - 1]); - } - return ret; - } - - // Non-strict mode: switch to column-based insertion. - if (!has_varlen_column) { - // Optimization: when there is no string/blob/text column, we only need - // to split by point-number so that each split will trigger a page - // seal (and avoid the per-row page-size check). - const uint32_t points_per_page = - common::g_config_value_.page_writer_max_point_num_; - - // Disable auto page sealing. We will seal pages at split boundaries. - time_chunk_writer->set_enable_page_seal_if_full(false); - for (uint32_t c = 0; c < value_chunk_writers.size(); c++) { - if (!IS_NULL(value_chunk_writers[c])) { - value_chunk_writers[c]->set_enable_page_seal_if_full(false); - } - } - - // Determine how many points we need to fill the current unsealed time - // page (it may already contain data from previous tablets). - uint32_t time_cur_points = time_chunk_writer->get_point_numer(); - if (time_cur_points >= points_per_page && - time_chunk_writer->has_current_page_data()) { - // Close the already-full page together with all aligned value - // pages. - if (RET_FAIL(time_chunk_writer->seal_current_page())) { - return ret; - } - for (uint32_t c = 0; c < value_chunk_writers.size(); c++) { - ValueChunkWriter* value_chunk_writer = value_chunk_writers[c]; - if (!IS_NULL(value_chunk_writer) && - value_chunk_writer->has_current_page_data()) { - if (RET_FAIL(value_chunk_writer->seal_current_page())) { - return ret; - } - } - } - time_cur_points = 0; - } - const uint32_t first_seg_len = - (time_cur_points > 0 && time_cur_points < points_per_page) - ? (points_per_page - time_cur_points) - : points_per_page; - - // 1) Write time in segments and seal all full segments (except the - // last remaining segment). - uint32_t seg_start = 0; - uint32_t seg_len = first_seg_len; - while (seg_start < total_rows) { - const uint32_t seg_end = std::min(seg_start + seg_len, total_rows); - if (RET_FAIL(time_write_column(time_chunk_writer, tablet, seg_start, - seg_end))) { - return ret; - } - seg_start = seg_end; - if (seg_start < total_rows) { - if (RET_FAIL(time_chunk_writer->seal_current_page())) { - return ret; - } - } - seg_len = points_per_page; - } - - // 2) Write each value column in the same segments. - ASSERT(value_chunk_writers.size() == tablet.get_column_count()); - for (uint32_t col = 0; col < value_chunk_writers.size(); col++) { - ValueChunkWriter* value_chunk_writer = value_chunk_writers[col]; - if (IS_NULL(value_chunk_writer)) { - continue; - } - - seg_start = 0; - seg_len = first_seg_len; - while (seg_start < total_rows) { - const uint32_t seg_end = - std::min(seg_start + seg_len, total_rows); - if (RET_FAIL(value_write_column(value_chunk_writer, tablet, col, - seg_start, seg_end))) { - return ret; - } - seg_start = seg_end; - if (seg_start < total_rows) { - if (value_chunk_writer->has_current_page_data() && - RET_FAIL(value_chunk_writer->seal_current_page())) { - return ret; - } - } - seg_len = points_per_page; - } - } - if (total_rows > 0) { - device_schema->last_time_ = std::max( - device_schema->last_time_, tablet.timestamps_[total_rows - 1]); - } - return ret; - } - - // General non-strict (may have varlen STRING/TEXT/BLOB columns): - // time auto-seals to provide aligned page boundaries; value writers - // skip auto page sealing and are sealed manually at time boundaries. - // Attention: since value-side auto-seal is disabled, if a varlen value - // page hits the memory threshold earlier, it may not seal immediately - // and instead will be sealed later at the recorded time-page boundaries - // (this may sacrifice the strict page size limit for performance). - time_chunk_writer->set_enable_page_seal_if_full(true); - for (uint32_t c = 0; c < value_chunk_writers.size(); c++) { - if (!IS_NULL(value_chunk_writers[c])) { - value_chunk_writers[c]->set_enable_page_seal_if_full(false); - } - } - - std::vector time_page_row_ends; - const uint32_t page_max_points = std::max( - 1, common::g_config_value_.page_writer_max_point_num_); - time_page_row_ends.reserve(total_rows / page_max_points + 1); - - // Write time and record where a time page is sealed. - for (uint32_t row = 0; row < total_rows; row++) { - const int32_t pages_before = time_chunk_writer->num_of_pages(); - if (RET_FAIL(time_chunk_writer->write(tablet.timestamps_[row]))) { - return ret; + ASSERT(data_types.size() == tablet.get_column_count()); + for (uint32_t c = 0; c < data_types.size(); c++) { + if (data_types[c] == common::NULL_TYPE) { + continue; } - const int32_t pages_after = time_chunk_writer->num_of_pages(); - if (pages_after > pages_before) { - const uint32_t boundary_end = row + 1; - if (time_page_row_ends.empty() || - time_page_row_ends.back() != boundary_end) { - time_page_row_ends.push_back(boundary_end); - } + if (data_types[c] != tablet.schema_vec_->at(c).data_type_) { + return E_TYPE_NOT_MATCH; } } - - // Write values column-by-column and seal at recorded boundaries. + time_write_column_batch(time_chunk_writer, tablet, 0, + tablet.get_cur_row_size()); ASSERT(value_chunk_writers.size() == tablet.get_column_count()); - for (uint32_t col = 0; col < value_chunk_writers.size(); col++) { - ValueChunkWriter* value_chunk_writer = value_chunk_writers[col]; + for (uint32_t c = 0; c < value_chunk_writers.size(); c++) { + ValueChunkWriter* value_chunk_writer = value_chunk_writers[c]; if (IS_NULL(value_chunk_writer)) { continue; } - uint32_t seg_start = 0; - for (uint32_t boundary_end : time_page_row_ends) { - if (boundary_end <= seg_start) { - continue; - } - if (RET_FAIL(value_write_column(value_chunk_writer, tablet, col, - seg_start, boundary_end))) { - return ret; - } - if (value_chunk_writer->has_current_page_data() && - RET_FAIL(value_chunk_writer->seal_current_page())) { - return ret; - } - seg_start = boundary_end; - } - if (seg_start < total_rows) { - if (RET_FAIL(value_write_column(value_chunk_writer, tablet, col, - seg_start, total_rows))) { - return ret; - } + if (RET_FAIL(value_write_column_batch(value_chunk_writer, tablet, c, 0, + tablet.get_cur_row_size()))) { + return ret; } } - if (total_rows > 0) { - device_schema->last_time_ = std::max( - device_schema->last_time_, tablet.timestamps_[total_rows - 1]); - } return ret; } int TsFileWriter::write_tablet(const Tablet& tablet) { int ret = E_OK; - auto device_id = - std::make_shared(tablet.insert_target_name_); - auto schema_it = schemas_.find(device_id); - if (schema_it == schemas_.end() || schema_it->second == nullptr) { - return E_DEVICE_NOT_EXIST; - } - MeasurementSchemaGroup* device_schema = schema_it->second; - const uint32_t total_rows = tablet.get_cur_row_size(); - if (enforce_recovered_last_time_order_ && total_rows > 0 && - tablet.timestamps_[0] <= device_schema->last_time_) { - return E_OUT_OF_ORDER; - } SimpleVector chunk_writers; SimpleVector data_types; MeasurementNamesFromTablet mnames_getter(tablet); - if (RET_FAIL(do_check_schema(device_id, mnames_getter, chunk_writers, - data_types))) { + if (RET_FAIL(do_check_schema( + std::make_shared(tablet.insert_target_name_), + mnames_getter, chunk_writers, data_types))) { return ret; } + ASSERT(data_types.size() == tablet.get_column_count()); + for (uint32_t c = 0; c < data_types.size(); c++) { + if (data_types[c] == common::NULL_TYPE) { + continue; + } + if (data_types[c] != tablet.schema_vec_->at(c).data_type_) { + return E_TYPE_NOT_MATCH; + } + } ASSERT(chunk_writers.size() == tablet.get_column_count()); for (uint32_t c = 0; c < chunk_writers.size(); c++) { ChunkWriter* chunk_writer = chunk_writers[c]; if (IS_NULL(chunk_writer)) { continue; } - if (RET_FAIL(write_column(chunk_writer, tablet, c))) { + if (RET_FAIL(write_column_batch(chunk_writer, tablet, c, 0, + tablet.max_row_num_))) { return ret; } } - if (total_rows > 0) { - device_schema->last_time_ = std::max( - device_schema->last_time_, tablet.timestamps_[total_rows - 1]); - } record_count_since_last_flush_ += tablet.max_row_num_; ret = check_memory_size_and_may_flush_chunks(); return ret; @@ -1214,120 +910,140 @@ int TsFileWriter::write_table(Tablet& tablet) { } auto device_id_end_index_pairs = split_tablet_by_device(tablet); - int start_idx = 0; - for (auto& device_id_end_index_pair : device_id_end_index_pairs) { - auto device_id = device_id_end_index_pair.first; - int end_idx = device_id_end_index_pair.second; - if (end_idx == 0) continue; - - SimpleVector value_chunk_writers; - TimeChunkWriter* time_chunk_writer = nullptr; - if (RET_FAIL(do_check_schema_table(device_id, tablet, time_chunk_writer, - value_chunk_writers))) { - return ret; - } - auto schema_it = schemas_.find(device_id); - MeasurementSchemaGroup* device_schema = - (schema_it == schemas_.end()) ? nullptr : schema_it->second; - - std::vector field_columns; - field_columns.reserve(tablet.get_column_count()); - for (uint32_t col = 0; col < tablet.get_column_count(); ++col) { - if (tablet.column_categories_[col] == - common::ColumnCategory::FIELD) { - field_columns.push_back(col); + + if (table_aligned_) { + // Per-device write context persisted across Phase 1 → Phase 2 so + // that lambdas can safely capture references. + struct ValueTask { + ValueChunkWriter* vcw; + uint32_t col_idx; + }; + struct DeviceWriteCtx { + TimeChunkWriter* tcw; + std::vector value_tasks; + std::vector page_boundaries; + uint32_t si; + uint32_t ei; + }; + + const uint32_t page_max_points = + std::max(1, g_config_value_.page_writer_max_point_num_); + + // Phase 1 (serial): schema check + page boundary computation + // for every device. + std::vector device_ctxs; + int start_idx = 0; + for (auto& pair : device_id_end_index_pairs) { + auto device_id = pair.first; + int end_idx = pair.second; + if (end_idx == 0) continue; + + SimpleVector value_chunk_writers; + TimeChunkWriter* time_chunk_writer = nullptr; + if (RET_FAIL(do_check_schema_table(device_id, tablet, + time_chunk_writer, + value_chunk_writers))) { + return ret; } - } - ASSERT(field_columns.size() == value_chunk_writers.size()); - - // Precompute page boundaries from point counts — no serial write - // needed. The first segment may be shorter if the time page already - // holds data from a previous write_table call. - const uint32_t page_max_points = std::max( - 1, common::g_config_value_.page_writer_max_point_num_); - const uint32_t si = static_cast(start_idx); - const uint32_t ei = static_cast(end_idx); - if (enforce_recovered_last_time_order_ && device_schema != nullptr && - si < ei && tablet.timestamps_[si] <= device_schema->last_time_) { - return E_OUT_OF_ORDER; - } - // If the current unsealed page is already at or past capacity (from - // a previous write_table call), seal it before starting new segments. - uint32_t time_cur_points = time_chunk_writer->get_point_numer(); - if (time_cur_points >= page_max_points) { - if (time_chunk_writer->has_current_page_data()) { - if (RET_FAIL(time_chunk_writer->seal_current_page())) { - return ret; + // Seal if the current page is already at capacity from a + // previous write_table call. + uint32_t time_cur_points = time_chunk_writer->get_point_numer(); + if (time_cur_points >= page_max_points) { + if (time_chunk_writer->has_current_page_data()) { + if (RET_FAIL(time_chunk_writer->seal_current_page())) { + return ret; + } + } + for (uint32_t k = 0; k < value_chunk_writers.size(); k++) { + if (!IS_NULL(value_chunk_writers[k]) && + value_chunk_writers[k]->has_current_page_data()) { + if (RET_FAIL( + value_chunk_writers[k]->seal_current_page())) { + return ret; + } + } } + time_cur_points = 0; } - for (uint32_t k = 0; k < value_chunk_writers.size(); k++) { - if (!IS_NULL(value_chunk_writers[k]) && - value_chunk_writers[k]->has_current_page_data()) { - if (RET_FAIL(value_chunk_writers[k]->seal_current_page())) { - return ret; + + const uint32_t si = static_cast(start_idx); + const uint32_t ei = static_cast(end_idx); + + // Precompute page boundaries. + const uint32_t first_seg_cap = + (time_cur_points > 0 && time_cur_points < page_max_points) + ? (page_max_points - time_cur_points) + : page_max_points; + std::vector page_boundaries; + { + uint32_t pos = si; + uint32_t seg_cap = first_seg_cap; + while (pos < ei) { + uint32_t seg_end = std::min(pos + seg_cap, ei); + if (seg_end < ei) { + page_boundaries.push_back(seg_end); } + pos = seg_end; + seg_cap = page_max_points; } } - time_cur_points = 0; - } - const uint32_t first_seg_cap = - (time_cur_points > 0 && time_cur_points < page_max_points) - ? (page_max_points - time_cur_points) - : page_max_points; - std::vector page_boundaries; // row indices where a page - // should seal - { - uint32_t pos = si; - uint32_t seg_cap = first_seg_cap; - while (pos < ei) { - uint32_t seg_end = std::min(pos + seg_cap, ei); - if (seg_end < ei) { - page_boundaries.push_back(seg_end); + DeviceWriteCtx ctx; + ctx.tcw = time_chunk_writer; + ctx.si = si; + ctx.ei = ei; + ctx.page_boundaries = std::move(page_boundaries); + uint32_t field_col_count = 0; + for (uint32_t i = 0; i < tablet.get_column_count(); ++i) { + if (tablet.column_categories_[i] == + common::ColumnCategory::FIELD) { + ValueChunkWriter* vcw = + value_chunk_writers[field_col_count]; + if (!IS_NULL(vcw)) { + ctx.value_tasks.push_back({vcw, i}); + } + field_col_count++; } - pos = seg_end; - seg_cap = page_max_points; } + device_ctxs.push_back(std::move(ctx)); + start_idx = end_idx; } - // We control page sealing explicitly at precomputed boundaries, so - // auto-seal must be disabled during segmented writes — otherwise a - // segment of exactly page_max_points would trigger auto-seal AND - // our explicit seal, double-sealing (sealing an empty page → crash). - // Note: with auto-seal off, the memory-based threshold - // (page_writer_max_memory_bytes_) is not enforced within a segment. - // For varlen columns (STRING/TEXT/BLOB), individual pages may exceed - // the memory limit. Each segment is still bounded by - // page_max_points rows, keeping pages within a reasonable size. - auto write_time_in_segments = [this, &tablet, &page_boundaries, si, - ei](TimeChunkWriter* tcw) -> int { + // Segmented write helpers: write data in segments aligned to + // precomputed page boundaries, with auto-seal disabled to prevent + // double-sealing. + auto write_time_segments = [this, &tablet]( + TimeChunkWriter* tcw, + const std::vector& boundaries, + uint32_t si, uint32_t ei) -> int { int r = E_OK; tcw->set_enable_page_seal_if_full(false); uint32_t seg_start = si; - for (uint32_t boundary : page_boundaries) { - if ((r = time_write_column(tcw, tablet, seg_start, boundary)) != - E_OK) + for (uint32_t boundary : boundaries) { + if ((r = time_write_column_batch(tcw, tablet, seg_start, + boundary)) != E_OK) return r; if ((r = tcw->seal_current_page()) != E_OK) return r; seg_start = boundary; } if (seg_start < ei) { - r = time_write_column(tcw, tablet, seg_start, ei); + r = time_write_column_batch(tcw, tablet, seg_start, ei); } tcw->set_enable_page_seal_if_full(true); return r; }; - auto write_value_in_segments = [this, &tablet, &page_boundaries, si, - ei](ValueChunkWriter* vcw, - uint32_t col_idx) -> int { + auto write_value_segments = [this, &tablet]( + ValueChunkWriter* vcw, uint32_t col_idx, + const std::vector& boundaries, + uint32_t si, uint32_t ei) -> int { int r = E_OK; vcw->set_enable_page_seal_if_full(false); uint32_t seg_start = si; - for (uint32_t boundary : page_boundaries) { - if ((r = value_write_column(vcw, tablet, col_idx, seg_start, - boundary)) != E_OK) + for (uint32_t boundary : boundaries) { + if ((r = value_write_column_batch(vcw, tablet, col_idx, + seg_start, boundary)) != E_OK) return r; if (vcw->has_current_page_data() && (r = vcw->seal_current_page()) != E_OK) @@ -1335,29 +1051,33 @@ int TsFileWriter::write_table(Tablet& tablet) { seg_start = boundary; } if (seg_start < ei) { - r = value_write_column(vcw, tablet, col_idx, seg_start, ei); + r = value_write_column_batch(vcw, tablet, col_idx, seg_start, + ei); } vcw->set_enable_page_seal_if_full(true); return r; }; - // All columns (time + values) write the same row segments and seal - // at the same boundaries — fully parallel. + // Phase 2: encode — submit ALL chunk writers across ALL devices + // to the thread pool. Each ChunkWriter is independent. + // Total tasks = n_devices * (1 time + n_field_cols). #ifdef ENABLE_THREADS if (g_config_value_.parallel_write_enabled_) { std::vector> futures; - futures.push_back(g_write_thread_pool_->submit( - [&write_time_in_segments, time_chunk_writer]() { - return write_time_in_segments(time_chunk_writer); - })); - for (uint32_t k = 0; k < value_chunk_writers.size(); k++) { - ValueChunkWriter* vcw = value_chunk_writers[k]; - if (IS_NULL(vcw)) continue; - uint32_t col_idx = field_columns[k]; - futures.push_back(g_write_thread_pool_->submit( - [&write_value_in_segments, vcw, col_idx]() { - return write_value_in_segments(vcw, col_idx); + for (auto& ctx : device_ctxs) { + futures.push_back( + thread_pool_.submit([&write_time_segments, &ctx]() { + return write_time_segments(ctx.tcw, ctx.page_boundaries, + ctx.si, ctx.ei); })); + for (auto& vt : ctx.value_tasks) { + futures.push_back(thread_pool_.submit( + [&write_value_segments, &vt, &ctx]() { + return write_value_segments(vt.vcw, vt.col_idx, + ctx.page_boundaries, + ctx.si, ctx.ei); + })); + } } for (auto& f : futures) { int r = f.get(); @@ -1367,22 +1087,70 @@ int TsFileWriter::write_table(Tablet& tablet) { } else #endif { - if (RET_FAIL(write_time_in_segments(time_chunk_writer))) { - return ret; - } - for (uint32_t k = 0; k < value_chunk_writers.size(); k++) { - ValueChunkWriter* vcw = value_chunk_writers[k]; - if (IS_NULL(vcw)) continue; - if (RET_FAIL(write_value_in_segments(vcw, field_columns[k]))) { + for (auto& ctx : device_ctxs) { + if (RET_FAIL(write_time_segments(ctx.tcw, ctx.page_boundaries, + ctx.si, ctx.ei))) { return ret; } + for (auto& vt : ctx.value_tasks) { + if (RET_FAIL(write_value_segments(vt.vcw, vt.col_idx, + ctx.page_boundaries, + ctx.si, ctx.ei))) { + return ret; + } + } } } - if (device_schema != nullptr && si < ei) { - device_schema->last_time_ = - std::max(device_schema->last_time_, tablet.timestamps_[ei - 1]); + } else { + int start_idx = 0; + for (auto& device_id_end_index_pair : device_id_end_index_pairs) { + auto device_id = device_id_end_index_pair.first; + int end_idx = device_id_end_index_pair.second; + if (end_idx == 0) continue; + + MeasurementNamesFromTablet mnames_getter(tablet); + SimpleVector chunk_writers; + SimpleVector data_types; + if (RET_FAIL(do_check_schema(device_id, mnames_getter, + chunk_writers, data_types))) { + return ret; + } + ASSERT(chunk_writers.size() == tablet.get_column_count()); + +#ifdef ENABLE_THREADS + if (chunk_writers.size() >= 2 && + g_config_value_.parallel_write_enabled_) { + const uint32_t si = start_idx; + const uint32_t ei = device_id_end_index_pair.second; + std::vector> futures; + for (uint32_t c = 0; c < chunk_writers.size(); c++) { + ChunkWriter* cw = chunk_writers[c]; + if (IS_NULL(cw)) continue; + futures.push_back( + thread_pool_.submit([this, cw, &tablet, c, si, ei]() { + return write_column_batch(cw, tablet, c, si, ei); + })); + } + for (auto& f : futures) { + int r = f.get(); + if (r != E_OK && ret == E_OK) ret = r; + } + if (ret != E_OK) return ret; + } else +#endif + { + for (uint32_t c = 0; c < chunk_writers.size(); c++) { + ChunkWriter* chunk_writer = chunk_writers[c]; + if (IS_NULL(chunk_writer)) continue; + if (RET_FAIL(write_column_batch( + chunk_writer, tablet, c, start_idx, + device_id_end_index_pair.second))) { + return ret; + } + } + } + start_idx = device_id_end_index_pair.second; } - start_idx = end_idx; } record_count_since_last_flush_ += tablet.cur_row_size_; // Reset string column buffers so the tablet can be reused for the next @@ -1396,14 +1164,13 @@ std::vector, int>> TsFileWriter::split_tablet_by_device(const Tablet& tablet) { std::vector, int>> result; - if (tablet.id_column_indexes_.empty()) { + if (tablet.id_column_indexes_.empty() || tablet.single_device_) { + // No tag columns or caller guarantees single device — skip boundary + // detection entirely. auto sentinel = std::make_shared("last_device_id"); result.emplace_back(std::move(sentinel), 0); - std::vector id_array; - id_array.push_back(new std::string(tablet.insert_target_name_)); - auto res = std::make_shared(id_array); - delete id_array[0]; - result.emplace_back(std::move(res), tablet.get_cur_row_size()); + std::shared_ptr dev_id(tablet.get_device_id(0)); + result.emplace_back(std::move(dev_id), tablet.get_cur_row_size()); return result; } @@ -1610,8 +1377,7 @@ int TsFileWriter::write_typed_column(ChunkWriter* chunk_writer, if (LIKELY(!col_notnull_bitmap.test(r))) { common::String val( string_col->buffer + string_col->offsets[r], - static_cast(string_col->offsets[r + 1] - - string_col->offsets[r])); + string_col->offsets[r + 1] - string_col->offsets[r]); if (RET_FAIL(chunk_writer->write(timestamps[r], val))) { return ret; } @@ -1663,8 +1429,7 @@ int TsFileWriter::write_typed_column(ValueChunkWriter* value_chunk_writer, int ret = E_OK; for (uint32_t r = start_idx; r < end_idx; r++) { common::String val(string_col->buffer + string_col->offsets[r], - static_cast(string_col->offsets[r + 1] - - string_col->offsets[r])); + string_col->offsets[r + 1] - string_col->offsets[r]); if (LIKELY(col_notnull_bitmap.test(r))) { if (RET_FAIL(value_chunk_writer->write(timestamps[r], val, true))) { return ret; @@ -1679,6 +1444,149 @@ int TsFileWriter::write_typed_column(ValueChunkWriter* value_chunk_writer, return ret; } +int TsFileWriter::time_write_column_batch(TimeChunkWriter* time_chunk_writer, + const Tablet& tablet, + uint32_t start_idx, + uint32_t end_idx) { + int64_t* timestamps = tablet.timestamps_; + int ret = E_OK; + if (IS_NULL(time_chunk_writer) || IS_NULL(timestamps)) { + return E_INVALID_ARG; + } + end_idx = std::min(end_idx, tablet.max_row_num_); + uint32_t count = end_idx - start_idx; + if (count == 0) return ret; + return time_chunk_writer->write_batch(timestamps + start_idx, count); +} + +int TsFileWriter::write_column_batch(ChunkWriter* chunk_writer, + const Tablet& tablet, int col_idx, + uint32_t start_idx, uint32_t end_idx) { + int ret = E_OK; + common::TSDataType data_type = tablet.schema_vec_->at(col_idx).data_type_; + int64_t* timestamps = tablet.timestamps_; + Tablet::ValueMatrixEntry col_values = tablet.value_matrix_[col_idx]; + BitMap& col_notnull_bitmap = tablet.bitmaps_[col_idx]; + end_idx = std::min(end_idx, tablet.max_row_num_); + uint32_t count = end_idx - start_idx; + if (count == 0) return ret; + + bool has_null = false; + if (col_notnull_bitmap.may_have_set_bits()) { + for (uint32_t r = start_idx; r < end_idx; r++) { + if (col_notnull_bitmap.test(r)) { + has_null = true; + break; + } + } + } + + if (!has_null) { + switch (data_type) { + case common::BOOLEAN: + ret = chunk_writer->write_batch( + timestamps + start_idx, col_values.bool_data + start_idx, + count); + break; + case common::INT32: + case common::DATE: + ret = chunk_writer->write_batch( + timestamps + start_idx, col_values.int32_data + start_idx, + count); + break; + case common::INT64: + case common::TIMESTAMP: + ret = chunk_writer->write_batch( + timestamps + start_idx, col_values.int64_data + start_idx, + count); + break; + case common::FLOAT: + ret = chunk_writer->write_batch( + timestamps + start_idx, col_values.float_data + start_idx, + count); + break; + case common::DOUBLE: + ret = chunk_writer->write_batch( + timestamps + start_idx, col_values.double_data + start_idx, + count); + break; + case common::STRING: + case common::TEXT: + case common::BLOB: { + auto* sc = col_values.string_col; + ret = chunk_writer->write_string_batch(timestamps + start_idx, + sc->buffer, sc->offsets, + start_idx, count); + break; + } + default: + ret = write_column(chunk_writer, tablet, col_idx, start_idx, + end_idx); + break; + } + } else { + ret = write_column(chunk_writer, tablet, col_idx, start_idx, end_idx); + } + return ret; +} + +int TsFileWriter::value_write_column_batch(ValueChunkWriter* value_chunk_writer, + const Tablet& tablet, int col_idx, + uint32_t start_idx, + uint32_t end_idx) { + int ret = E_OK; + common::TSDataType data_type = tablet.schema_vec_->at(col_idx).data_type_; + int64_t* timestamps = tablet.timestamps_; + Tablet::ValueMatrixEntry col_values = tablet.value_matrix_[col_idx]; + BitMap& col_notnull_bitmap = tablet.bitmaps_[col_idx]; + end_idx = std::min(end_idx, tablet.max_row_num_); + uint32_t count = end_idx - start_idx; + if (count == 0) return ret; + + switch (data_type) { + case common::BOOLEAN: + ret = value_chunk_writer->write_batch( + timestamps, col_values.bool_data, col_notnull_bitmap, start_idx, + count); + break; + case common::DATE: + case common::INT32: + ret = value_chunk_writer->write_batch( + timestamps, col_values.int32_data, col_notnull_bitmap, + start_idx, count); + break; + case common::TIMESTAMP: + case common::INT64: + ret = value_chunk_writer->write_batch( + timestamps, col_values.int64_data, col_notnull_bitmap, + start_idx, count); + break; + case common::FLOAT: + ret = value_chunk_writer->write_batch( + timestamps, col_values.float_data, col_notnull_bitmap, + start_idx, count); + break; + case common::DOUBLE: + ret = value_chunk_writer->write_batch( + timestamps, col_values.double_data, col_notnull_bitmap, + start_idx, count); + break; + case common::STRING: + case common::TEXT: + case common::BLOB: { + auto* sc = col_values.string_col; + ret = value_chunk_writer->write_string_batch( + timestamps, sc->buffer, sc->offsets, col_notnull_bitmap, + start_idx, count); + break; + } + default: + ret = E_NOT_SUPPORT; + break; + } + return ret; +} + // TODO make sure ret is meaningful to SDK user int TsFileWriter::flush() { int ret = E_OK; @@ -1691,9 +1599,10 @@ int TsFileWriter::flush() { /* since @schemas_ used std::map which is rbtree underlying, so map itself is ordered by device name. */ + DeviceSchemasMapIter device_iter; for (device_iter = schemas_.begin(); device_iter != schemas_.end(); - device_iter++) { // cppcheck-suppress postfixOperator + device_iter++) { if (check_chunk_group_empty(device_iter->second, device_iter->second->is_aligned_)) { continue; @@ -1707,6 +1616,7 @@ int TsFileWriter::flush() { } else if (RET_FAIL(io_writer_->end_flush_chunk_group(is_aligned))) { } } + record_count_since_last_flush_ = 0; return ret; } @@ -1752,6 +1662,55 @@ bool TsFileWriter::check_chunk_group_empty(MeasurementSchemaGroup* chunk_group, writer->reset(); \ } +// Write already-encoded chunk data to stream (no compression — done earlier). +#define FLUSH_CHUNK_ENCODED(writer, io_writer, name, data_type, encoding, \ + compression, num_pages) \ + if (RET_FAIL(io_writer->start_flush_chunk(writer->get_chunk_data(), name, \ + data_type, encoding, \ + compression, num_pages))) { \ + } else if (RET_FAIL(io_writer->flush_chunk(writer->get_chunk_data()))) { \ + } else if (RET_FAIL(io_writer->end_flush_chunk( \ + writer->get_chunk_statistic()))) { \ + } else { \ + writer->reset(); \ + } + +int TsFileWriter::flush_chunk_group_encoded(MeasurementSchemaGroup* chunk_group, + bool is_aligned) { + int ret = E_OK; + MeasurementSchemaMap& map = chunk_group->measurement_schema_map_; + + if (chunk_group->is_aligned_) { + TimeChunkWriter*& time_chunk_writer = chunk_group->time_chunk_writer_; + ChunkHeader chunk_header = time_chunk_writer->get_chunk_header(); + FLUSH_CHUNK_ENCODED( + time_chunk_writer, io_writer_, chunk_header.measurement_name_, + chunk_header.data_type_, chunk_header.encoding_type_, + chunk_header.compression_type_, time_chunk_writer->num_of_pages()) + } + + for (MeasurementSchemaMapIter ms_iter = map.begin(); ms_iter != map.end(); + ms_iter++) { + MeasurementSchema* m_schema = ms_iter->second; + if (!chunk_group->is_aligned_ && m_schema->chunk_writer_ != nullptr) { + ChunkWriter*& chunk_writer = m_schema->chunk_writer_; + FLUSH_CHUNK_ENCODED( + chunk_writer, io_writer_, m_schema->measurement_name_, + m_schema->data_type_, m_schema->encoding_, + m_schema->compression_type_, chunk_writer->num_of_pages()) + } else if (m_schema->value_chunk_writer_ != nullptr) { + ValueChunkWriter*& value_chunk_writer = + m_schema->value_chunk_writer_; + FLUSH_CHUNK_ENCODED( + value_chunk_writer, io_writer_, m_schema->measurement_name_, + m_schema->data_type_, m_schema->encoding_, + m_schema->compression_type_, value_chunk_writer->num_of_pages()) + } + } + + return ret; +} + int TsFileWriter::flush_chunk_group(MeasurementSchemaGroup* chunk_group, bool is_aligned) { int ret = E_OK; diff --git a/cpp/src/writer/tsfile_writer.h b/cpp/src/writer/tsfile_writer.h index a2c8f2842..962a0e8fe 100644 --- a/cpp/src/writer/tsfile_writer.h +++ b/cpp/src/writer/tsfile_writer.h @@ -33,7 +33,9 @@ #include "common/record.h" #include "common/schema.h" #include "common/tablet.h" -#include "utils/util_define.h" // mode_t and other platform-compat shims +#ifdef ENABLE_THREADS +#include "common/thread_pool.h" +#endif namespace storage { class WriteFile; @@ -48,7 +50,6 @@ extern int libtsfile_init(); extern void libtsfile_destroy(); extern void set_page_max_point_count(uint32_t page_max_ponint_count); extern void set_max_degree_of_index_node(uint32_t max_degree_of_index_node); -extern void set_strict_page_size(bool strict_page_size); class TsFileWriter { public: @@ -98,6 +99,7 @@ class TsFileWriter { std::shared_ptr get_table_schema( const std::string& table_name) const; int64_t calculate_mem_size_for_all_group(); + int64_t calculate_meta_mem_size() const; int check_memory_size_and_may_flush_chunks(); /* * Flush buffer to disk file, but do not writer file index part. @@ -119,12 +121,9 @@ class TsFileWriter { int write_point_aligned(ValueChunkWriter* value_chunk_writer, int64_t timestamp, common::TSDataType data_type, const DataPoint& point); - int maybe_seal_aligned_pages_together( - TimeChunkWriter* time_chunk_writer, - common::SimpleVector& value_chunk_writers, - int32_t time_pages_before, - const std::vector& value_pages_before); int flush_chunk_group(MeasurementSchemaGroup* chunk_group, bool is_aligned); + int flush_chunk_group_encoded(MeasurementSchemaGroup* chunk_group, + bool is_aligned); int write_typed_column(storage::ChunkWriter* chunk_writer, int64_t* timestamps, bool* col_values, @@ -196,7 +195,11 @@ class TsFileWriter { int64_t record_count_for_next_mem_check_; bool write_file_created_; bool io_writer_owned_; // false when init(RestorableTsFileIOWriter*) - bool enforce_recovered_last_time_order_; + bool table_aligned_ = true; +#ifdef ENABLE_THREADS + common::ThreadPool thread_pool_{ + (size_t)common::g_config_value_.write_thread_count_}; +#endif int write_typed_column(ValueChunkWriter* value_chunk_writer, int64_t* timestamps, bool* col_values, @@ -231,6 +234,16 @@ class TsFileWriter { int value_write_column(ValueChunkWriter* value_chunk_writer, const Tablet& tablet, int col_idx, uint32_t start_idx, uint32_t end_idx); + + int write_column_batch(storage::ChunkWriter* chunk_writer, + const Tablet& tablet, int col_idx, + uint32_t start_idx, uint32_t end_idx); + int time_write_column_batch(TimeChunkWriter* time_chunk_writer, + const Tablet& tablet, uint32_t start_idx, + uint32_t end_idx); + int value_write_column_batch(ValueChunkWriter* value_chunk_writer, + const Tablet& tablet, int col_idx, + uint32_t start_idx, uint32_t end_idx); }; } // end namespace storage diff --git a/cpp/src/writer/value_chunk_writer.cc b/cpp/src/writer/value_chunk_writer.cc index a59cf8d3f..fcaf87e90 100644 --- a/cpp/src/writer/value_chunk_writer.cc +++ b/cpp/src/writer/value_chunk_writer.cc @@ -110,7 +110,7 @@ int ValueChunkWriter::seal_cur_page(bool end_chunk) { /*stat*/ false, /*data*/ false); if (IS_SUCC(ret)) { save_first_page_data(value_page_writer_); - value_page_writer_.clear_page_data(); + // value_page_writer_.destroy_page_data(); value_page_writer_.reset(); } } @@ -145,6 +145,11 @@ void ValueChunkWriter::save_first_page_data( ValuePageWriter& first_page_writer) { first_page_data_ = first_page_writer.get_cur_page_data(); first_page_statistic_->deep_copy_from(first_page_writer.get_statistic()); + // Take ownership of the heap buffers: get_cur_page_data() returned a + // shallow copy, so without this we'd alias compressed_buf_ / + // uncompressed_buf_ between cur_page_data_ and first_page_data_ and + // double-free at destroy() time. + first_page_writer.release_cur_page_data(); } int ValueChunkWriter::write_first_page_data(ByteStream& pages_data, @@ -161,8 +166,7 @@ int ValueChunkWriter::write_first_page_data(ByteStream& pages_data, int ValueChunkWriter::end_encode_chunk() { int ret = E_OK; - if (value_page_writer_.get_point_numer() > 0 || - (has_current_page_data() && num_of_pages_ == 0)) { + if (value_page_writer_.get_statistic()->count_ > 0) { ret = seal_cur_page(/*end_chunk*/ true); if (E_OK == ret) { chunk_header_.data_size_ = chunk_data_.total_size(); @@ -175,9 +179,6 @@ int ValueChunkWriter::end_encode_chunk() { chunk_header_.data_size_ = chunk_data_.total_size(); chunk_header_.num_of_pages_ = num_of_pages_; } - } else if (num_of_pages_ > 0) { - chunk_header_.data_size_ = chunk_data_.total_size(); - chunk_header_.num_of_pages_ = num_of_pages_; } #if DEBUG_SE std::cout << "end_encode_chunk: num_of_pages_=" << num_of_pages_ @@ -197,7 +198,9 @@ int64_t ValueChunkWriter::estimate_max_series_mem_size() { } bool ValueChunkWriter::hasData() { - return num_of_pages_ > 0 || has_current_page_data(); + return num_of_pages_ > 0 || + (value_page_writer_.get_statistic() != nullptr && + value_page_writer_.get_statistic()->count_ > 0); } } // end namespace storage diff --git a/cpp/src/writer/value_chunk_writer.h b/cpp/src/writer/value_chunk_writer.h index 64eb4cc50..b4373fa68 100644 --- a/cpp/src/writer/value_chunk_writer.h +++ b/cpp/src/writer/value_chunk_writer.h @@ -53,8 +53,7 @@ class ValueChunkWriter { first_page_data_(), first_page_statistic_(nullptr), chunk_header_(), - num_of_pages_(0), - enable_page_seal_if_full_(true) {} + num_of_pages_(0) {} ~ValueChunkWriter() { destroy(); } int init(const common::ColumnSchema& col_schema); int init(const std::string& measurement_name, common::TSDataType data_type, @@ -110,6 +109,68 @@ class ValueChunkWriter { VCW_DO_WRITE_FOR_TYPE(isnull); } + template + int write_batch(const int64_t* timestamps, const T* values, + const common::BitMap& col_notnull_bitmap, + uint32_t start_idx, uint32_t count) { + int ret = common::E_OK; + uint32_t offset = 0; + while (offset < count) { + uint32_t cur_points = value_page_writer_.get_point_numer(); + uint32_t page_remaining = + common::g_config_value_.page_writer_max_point_num_ - cur_points; + if (page_remaining == 0) { + if (RET_FAIL(seal_cur_page(false))) { + return ret; + } + page_remaining = + common::g_config_value_.page_writer_max_point_num_; + } + uint32_t batch_size = std::min(count - offset, page_remaining); + if (RET_FAIL(value_page_writer_.write_batch( + timestamps, values, col_notnull_bitmap, start_idx + offset, + batch_size))) { + return ret; + } + offset += batch_size; + if (RET_FAIL(seal_cur_page_if_full())) { + return ret; + } + } + return ret; + } + + int write_string_batch(const int64_t* timestamps, const char* buffer, + const uint32_t* offsets, + const common::BitMap& col_notnull_bitmap, + uint32_t start_idx, uint32_t count) { + int ret = common::E_OK; + uint32_t offset = 0; + while (offset < count) { + uint32_t cur_points = value_page_writer_.get_point_numer(); + uint32_t page_remaining = + common::g_config_value_.page_writer_max_point_num_ - cur_points; + if (page_remaining == 0) { + if (RET_FAIL(seal_cur_page(false))) { + return ret; + } + page_remaining = + common::g_config_value_.page_writer_max_point_num_; + } + uint32_t batch_size = std::min(count - offset, page_remaining); + if (RET_FAIL(value_page_writer_.write_string_batch( + timestamps, buffer, offsets, col_notnull_bitmap, + start_idx + offset, batch_size))) { + return ret; + } + offset += batch_size; + if (RET_FAIL(seal_cur_page_if_full())) { + return ret; + } + } + return ret; + } + int end_encode_chunk(); common::ByteStream& get_chunk_data() { return chunk_data_; } Statistic* get_chunk_statistic() { return chunk_statistic_; } @@ -119,8 +180,8 @@ class ValueChunkWriter { bool hasData(); - /** True if the current (unsealed) page has at least one write (including - * nulls). */ + /** True if the current (unsealed) page has at least one write + * (including NULLs). */ bool has_current_page_data() const { return value_page_writer_.get_total_write_count() > 0; } @@ -129,15 +190,11 @@ class ValueChunkWriter { return value_page_writer_.get_point_numer(); } - /** - * Force seal the current page (for aligned table model: when time page - * seals due to memory/point threshold, all value pages must seal together). - * @return E_OK on success. - */ + /** Force seal the current page. */ int seal_current_page() { return seal_cur_page(false); } - // For aligned writer: allow disabling the automatic page-size/point-number - // check so the caller can seal pages at chosen boundaries. + // Allow disabling the automatic page-size/point-number check so the + // caller can seal pages at chosen boundaries. FORCE_INLINE void set_enable_page_seal_if_full(bool enable) { enable_page_seal_if_full_ = enable; } @@ -183,8 +240,7 @@ class ValueChunkWriter { ChunkHeader chunk_header_; int32_t num_of_pages_; - // If false, write() won't auto-seal when the current page becomes full. - bool enable_page_seal_if_full_; + bool enable_page_seal_if_full_ = true; }; } // end namespace storage diff --git a/cpp/src/writer/value_page_writer.cc b/cpp/src/writer/value_page_writer.cc index 1c8f05350..feedb1870 100644 --- a/cpp/src/writer/value_page_writer.cc +++ b/cpp/src/writer/value_page_writer.cc @@ -43,7 +43,7 @@ int ValuePageData::init(ByteStream& col_notnull_bitmap_bs, ByteStream& value_bs, if (IS_NULL(uncompressed_buf_)) { return E_OOM; } - if (col_notnull_bitmap_buf_size_ == 0) { + if (col_notnull_bitmap_buf_size_ == 0 || value_buf_size_ == 0) { return E_INVALID_ARG; } uncompressed_buf_[0] = (unsigned char)((size >> 24) & 0xFF); @@ -54,11 +54,11 @@ int ValuePageData::init(ByteStream& col_notnull_bitmap_bs, ByteStream& value_bs, if (RET_FAIL(common::copy_bs_to_buf(col_notnull_bitmap_bs, uncompressed_buf_ + sizeof(size), col_notnull_bitmap_buf_size_))) { - } else if (value_buf_size_ > 0 && RET_FAIL(common::copy_bs_to_buf( - value_bs, - uncompressed_buf_ + sizeof(size) + - col_notnull_bitmap_buf_size_, - value_buf_size_))) { + } else if (RET_FAIL(common::copy_bs_to_buf(value_bs, + uncompressed_buf_ + + sizeof(size) + + col_notnull_bitmap_buf_size_, + value_buf_size_))) { } else { // TODO // NOTE: different compressor may have different compress API diff --git a/cpp/src/writer/value_page_writer.h b/cpp/src/writer/value_page_writer.h index 97f8a5f0d..1eb05fa87 100644 --- a/cpp/src/writer/value_page_writer.h +++ b/cpp/src/writer/value_page_writer.h @@ -51,6 +51,7 @@ struct ValuePageData { common::ByteStream& value_bs, Compressor* compressor, uint32_t size); void destroy() { + // Be careful about the memory if (uncompressed_buf_ != nullptr) { common::mem_free(uncompressed_buf_); uncompressed_buf_ = nullptr; @@ -59,19 +60,6 @@ struct ValuePageData { compressor_->after_compress(compressed_buf_); compressed_buf_ = nullptr; } - compressor_ = nullptr; - } - - /** Clear pointers without freeing (transfer ownership to another holder). - */ - void clear() { - col_notnull_bitmap_buf_size_ = 0; - value_buf_size_ = 0; - uncompressed_size_ = 0; - compressed_size_ = 0; - uncompressed_buf_ = nullptr; - compressed_buf_ = nullptr; - compressor_ = nullptr; } }; @@ -163,6 +151,124 @@ class ValuePageWriter { VPW_DO_WRITE_FOR_TYPE(isnull); } + // Batch write for aligned/table model. + // In the tablet bitmap: bit=1 means null, bit=0 means not null. + // In VPW_DO_WRITE_FOR_TYPE: ISNULL=true skips encoding. + // So: tablet bitmap.test(r)=true -> isnull=true (null value) + // tablet bitmap.test(r)=false -> isnull=false (valid value) + template + int write_batch(const int64_t* timestamps, const T* values, + const common::BitMap& col_notnull_bitmap, + uint32_t start_idx, uint32_t count) { + int ret = common::E_OK; + if (count == 0) return ret; + + uint32_t valid_count = 0; + for (uint32_t i = 0; i < count; i++) { + uint32_t row = start_idx + i; + if ((size_ / 8) + 1 > col_notnull_bitmap_.size()) { + col_notnull_bitmap_.push_back(0); + } + // bit=1 in tablet bitmap means null; bit=0 means not null + bool is_null = + const_cast(col_notnull_bitmap).test(row); + if (!is_null) { + // Mark as not-null in page bitmap + col_notnull_bitmap_[size_ / 8] |= (MASK >> (size_ % 8)); + valid_count++; + } + size_++; + } + + if (valid_count == 0) return ret; + + // If all values are valid, we can encode the batch directly + if (valid_count == count) { + if (RET_FAIL(value_encoder_->encode_batch(values + start_idx, count, + value_out_stream_))) { + return ret; + } + statistic_->update_batch(timestamps + start_idx, values + start_idx, + count); + } else { + // Encode only non-null values one by one + for (uint32_t i = 0; i < count; i++) { + uint32_t row = start_idx + i; + if (!const_cast(col_notnull_bitmap) + .test(row)) { + if (RET_FAIL(value_encoder_->encode(values[row], + value_out_stream_))) { + return ret; + } + statistic_->update(timestamps[row], values[row]); + } + } + } + return ret; + } + + // Batch write strings from Arrow-style offset+buffer layout with null + // bitmap. + int write_string_batch(const int64_t* timestamps, const char* buffer, + const uint32_t* offsets, + const common::BitMap& col_notnull_bitmap, + uint32_t start_idx, uint32_t count) { + int ret = common::E_OK; + if (count == 0) return ret; + + // Phase 1: bitmap + count valid rows + uint32_t valid_count = 0; + for (uint32_t i = 0; i < count; i++) { + uint32_t row = start_idx + i; + if ((size_ / 8) + 1 > col_notnull_bitmap_.size()) { + col_notnull_bitmap_.push_back(0); + } + bool is_null = + const_cast(col_notnull_bitmap).test(row); + if (!is_null) { + col_notnull_bitmap_[size_ / 8] |= (MASK >> (size_ % 8)); + valid_count++; + } + size_++; + } + + if (valid_count == 0) return ret; + + // Phase 2: encode non-null strings + if (valid_count == count) { + // All valid — batch encode directly + if (RET_FAIL(value_encoder_->encode_string_batch( + buffer, offsets, start_idx, count, value_out_stream_))) { + return ret; + } + } else { + // Mixed — encode only non-null strings one by one + for (uint32_t i = 0; i < count; i++) { + uint32_t row = start_idx + i; + if (!const_cast(col_notnull_bitmap) + .test(row)) { + uint32_t len = offsets[row + 1] - offsets[row]; + common::String val(buffer + offsets[row], len); + if (RET_FAIL( + value_encoder_->encode(val, value_out_stream_))) { + return ret; + } + } + } + } + + // Phase 3: update statistics for non-null rows + for (uint32_t i = 0; i < count; i++) { + uint32_t row = start_idx + i; + if (!const_cast(col_notnull_bitmap).test(row)) { + uint32_t len = offsets[row + 1] - offsets[row]; + common::String val(buffer + offsets[row], len); + statistic_->update(timestamps[row], val); + } + } + return ret; + } + FORCE_INLINE uint32_t get_point_numer() const { return statistic_->count_; } FORCE_INLINE uint32_t get_total_write_count() const { return size_; } FORCE_INLINE uint32_t get_col_notnull_bitmap_out_stream_size() const { @@ -195,9 +301,16 @@ class ValuePageWriter { } FORCE_INLINE Statistic* get_statistic() { return statistic_; } ValuePageData get_cur_page_data() { return cur_page_data_; } + // Transfer ownership of cur_page_data_'s heap buffers (uncompressed_buf_ + // and compressed_buf_) out of this writer. Callers use this together with + // get_cur_page_data() to keep a long-lived copy of the data (e.g. as the + // first-page snapshot) without leaving an alias here that would cause a + // double free on destroy. + void release_cur_page_data() { + cur_page_data_.uncompressed_buf_ = nullptr; + cur_page_data_.compressed_buf_ = nullptr; + } void destroy_page_data() { cur_page_data_.destroy(); } - /** Clear cur_page_data_ without freeing (after ownership transferred). */ - void clear_page_data() { cur_page_data_.clear(); } private: FORCE_INLINE int prepare_end_page() { @@ -214,7 +327,7 @@ class ValuePageWriter { common::ByteStream& pages_data); private: - static const uint32_t OUT_STREAM_PAGE_SIZE = 1024; + static const uint32_t OUT_STREAM_PAGE_SIZE = 65536; private: common::TSDataType data_type_; @@ -229,7 +342,7 @@ class ValuePageWriter { std::vector col_notnull_bitmap_; uint32_t size_; - static TSFILE_API uint32_t MASK; + static uint32_t MASK; }; } // end namespace storage diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index f5d084f8f..2be9c1b2c 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -108,17 +108,12 @@ file(GLOB_RECURSE TEST_SRCS "encoding/*_test.cc" "utils/*_test.cc" "file/*_test.cc" + "parser/*_test.cc" "reader/*_test.cc" "writer/*_test.cc" "cwrapper/*_test.cc" ) -# Parser tests depend on the ANTLR4 runtime; only build them when it is enabled. -if (ENABLE_ANTLR4) - file(GLOB_RECURSE PARSER_TEST_SRCS "parser/*_test.cc") - list(APPEND TEST_SRCS ${PARSER_TEST_SRCS}) -endif() - if (ENABLE_SNAPPY) file(GLOB_RECURSE SNAPPY_TEST_SRCS "compress/*snappy*_test.cc") list(APPEND TEST_SRCS ${SNAPPY_TEST_SRCS}) @@ -176,13 +171,8 @@ include(GoogleTest) # On Windows, delay test discovery until ctest runs (PRE_TEST) so the test exe # runs with the correct env (e.g. PATH has MinGW, libtsfile.dll is present). # Avoids 0xc0000139 when discovery runs at build time. -# -# DISCOVERY_TIMEOUT is raised well above the 5s default: the first execution of -# the freshly-built test executable can be delayed by on-access antivirus -# scanning (e.g. Windows Defender), which otherwise trips a spurious -# "Process terminated due to timeout" while gtest_discover_tests enumerates it. if(WIN32) - gtest_discover_tests(TsFile_Test DISCOVERY_MODE PRE_TEST DISCOVERY_TIMEOUT 120) + gtest_discover_tests(TsFile_Test DISCOVERY_MODE PRE_TEST) else() gtest_discover_tests(TsFile_Test) endif() \ No newline at end of file diff --git a/cpp/test/common/allocator/byte_stream_test.cc b/cpp/test/common/allocator/byte_stream_test.cc index b211803c3..6296e3a5d 100644 --- a/cpp/test/common/allocator/byte_stream_test.cc +++ b/cpp/test/common/allocator/byte_stream_test.cc @@ -87,8 +87,7 @@ TEST_F(ByteStreamTest, WriteReadLargeQuantities) { write_to_stream(&data, 1); } - // 1 MiB buffer: keep it off the stack (MSVC's default stack is only 1 MiB). - static uint8_t read_buffer[1024 * 1024]; + uint8_t read_buffer[1024 * 1024]; for (int i = 0; i < 1024 * 1024; i++) { uint32_t read_len = 0; read_from_stream(read_buffer + i, 1, read_len); diff --git a/cpp/test/common/device_id_test.cc b/cpp/test/common/device_id_test.cc index f3877c278..a72bd2889 100644 --- a/cpp/test/common/device_id_test.cc +++ b/cpp/test/common/device_id_test.cc @@ -31,16 +31,6 @@ TEST(DeviceIdTest, NormalTest) { ASSERT_EQ("root.db.tb.device1", device_id.get_device_name()); } -TEST(DeviceIdTest, DeviceIdStringFallbackSemantic) { - std::string device_id_string = "root.sg1.FeederA"; - StringArrayDeviceID device_id = StringArrayDeviceID(device_id_string); - - // For a 3-level identifier, table name should be merged as "root.sg1". - ASSERT_EQ("root.sg1", device_id.get_table_name()); - ASSERT_EQ(2, device_id.segment_num()); - ASSERT_EQ("root.sg1.FeederA", device_id.get_device_name()); -} - TEST(DeviceIdTest, TabletDeviceId) { std::vector measurement_types{ TSDataType::STRING, TSDataType::STRING, TSDataType::STRING, diff --git a/cpp/test/common/tsblock/arrow_tsblock_test.cc b/cpp/test/common/tsblock/arrow_tsblock_test.cc index 348c18a4a..123efb59f 100644 --- a/cpp/test/common/tsblock/arrow_tsblock_test.cc +++ b/cpp/test/common/tsblock/arrow_tsblock_test.cc @@ -20,7 +20,6 @@ #include -#include "common/tablet.h" #include "common/tsblock/tsblock.h" #include "cwrapper/tsfile_cwrapper.h" #include "utils/db_utils.h" @@ -35,13 +34,9 @@ using ArrowSchema = ::ArrowSchema; #define ARROW_FLAG_NULLABLE 2 #define ARROW_FLAG_MAP_KEYS_SORTED 4 -// Function declarations (defined in arrow_c.cc) +// Function declaration (defined in arrow_c.cc) int TsBlockToArrowStruct(common::TsBlock& tsblock, ArrowArray* out_array, ArrowSchema* out_schema); -int ArrowStructToTablet(const char* table_name, const ArrowArray* in_array, - const ArrowSchema* in_schema, - const storage::TableSchema* reg_schema, - storage::Tablet** out_tablet, int time_col_index); } // namespace arrow static void VerifyArrowSchema( @@ -337,152 +332,3 @@ TEST(ArrowTsBlockTest, TsBlock_EdgeCases) { } } } - -// Test ArrowStructToTablet with sliced Arrow arrays (offset > 0). -// Full arrays have 5 rows; offset=2 on every child means only rows [2..4] -// (3 rows) are consumed. Row index 3 in the full array (local index 1 in the -// slice) carries a null in the INT32 column. -TEST(ArrowStructToTabletTest, SlicedArray_WithOffset) { - // --- timestamps (int64, no nulls) --- - int64_t ts_data[5] = {1000, 1001, 1002, 1003, 1004}; - const void* ts_bufs[2] = {nullptr, ts_data}; - ArrowArray ts_arr = {}; - ts_arr.length = 3; - ts_arr.offset = 2; - ts_arr.null_count = 0; - ts_arr.n_buffers = 2; - ts_arr.buffers = ts_bufs; - - ArrowSchema ts_schema = {}; - ts_schema.format = "l"; - ts_schema.name = "time"; - ts_schema.flags = ARROW_FLAG_NULLABLE; - - // --- INT32 column: values [100..104], row 3 (global) = local row 1 null - // Arrow validity bitmap: bit=1 means valid. - // bits 0,1,2,4=valid, bit 3=null → byte 0 = 0b00010111 = 0x17 - int32_t int_data[5] = {100, 101, 102, 103, 104}; - uint8_t int_validity[1] = {0x17}; - const void* int_bufs[2] = {int_validity, int_data}; - ArrowArray int_arr = {}; - int_arr.length = 3; - int_arr.offset = 2; - int_arr.null_count = 1; - int_arr.n_buffers = 2; - int_arr.buffers = int_bufs; - - ArrowSchema int_schema = {}; - int_schema.format = "i"; - int_schema.name = "int_col"; - int_schema.flags = ARROW_FLAG_NULLABLE; - - // --- DOUBLE column: values [10.0..14.0], no nulls --- - double dbl_data[5] = {10.0, 11.0, 12.0, 13.0, 14.0}; - const void* dbl_bufs[2] = {nullptr, dbl_data}; - ArrowArray dbl_arr = {}; - dbl_arr.length = 3; - dbl_arr.offset = 2; - dbl_arr.null_count = 0; - dbl_arr.n_buffers = 2; - dbl_arr.buffers = dbl_bufs; - - ArrowSchema dbl_schema = {}; - dbl_schema.format = "g"; - dbl_schema.name = "dbl_col"; - dbl_schema.flags = ARROW_FLAG_NULLABLE; - - // --- UTF-8 string column: "str0".."str4", no nulls --- - // With offset=2, the slice covers "str2","str3","str4". - const char str_chars[] = "str0str1str2str3str4"; - int32_t str_offs[6] = {0, 4, 8, 12, 16, 20}; - const void* str_bufs[3] = {nullptr, str_offs, str_chars}; - ArrowArray str_arr = {}; - str_arr.length = 3; - str_arr.offset = 2; - str_arr.null_count = 0; - str_arr.n_buffers = 3; - str_arr.buffers = str_bufs; - - ArrowSchema str_schema = {}; - str_schema.format = "u"; - str_schema.name = "str_col"; - str_schema.flags = ARROW_FLAG_NULLABLE; - - // --- parent struct array --- - ArrowArray* children[4] = {&ts_arr, &int_arr, &dbl_arr, &str_arr}; - ArrowArray parent = {}; - parent.length = 3; - parent.n_buffers = 0; - parent.n_children = 4; - parent.children = children; - - ArrowSchema* child_schemas[4] = {&ts_schema, &int_schema, &dbl_schema, - &str_schema}; - ArrowSchema parent_schema = {}; - parent_schema.format = "+s"; - parent_schema.n_children = 4; - parent_schema.children = child_schemas; - - storage::Tablet* tablet = nullptr; - // time_col_index=0 → timestamp from ts_arr; data cols are int, dbl, str - int ret = arrow::ArrowStructToTablet("test_table", &parent, &parent_schema, - nullptr, &tablet, 0); - ASSERT_EQ(ret, common::E_OK); - ASSERT_NE(tablet, nullptr); - - EXPECT_EQ(tablet->get_cur_row_size(), 3u); - - common::TSDataType dtype; - void* v; - - // INT32 col (schema_index=0): local rows 0,1,2 → 102, null, 104 - v = tablet->get_value(0, 0, dtype); - ASSERT_NE(v, nullptr); - EXPECT_EQ(*static_cast(v), 102); - - v = tablet->get_value(1, 0, dtype); - EXPECT_EQ(v, nullptr); // row 3 in original data is null - - v = tablet->get_value(2, 0, dtype); - ASSERT_NE(v, nullptr); - EXPECT_EQ(*static_cast(v), 104); - - // DOUBLE col (schema_index=1): local rows 0,1,2 → 12.0, 13.0, 14.0 - v = tablet->get_value(0, 1, dtype); - ASSERT_NE(v, nullptr); - EXPECT_DOUBLE_EQ(*static_cast(v), 12.0); - - v = tablet->get_value(1, 1, dtype); - ASSERT_NE(v, nullptr); - EXPECT_DOUBLE_EQ(*static_cast(v), 13.0); - - v = tablet->get_value(2, 1, dtype); - ASSERT_NE(v, nullptr); - EXPECT_DOUBLE_EQ(*static_cast(v), 14.0); - - // STRING col (schema_index=2): local rows 0,1,2 → "str2","str3","str4" - // Arrow "u" maps to common::TEXT; offset normalization in arrow_c.cc - // ensures offsets[0]==0 before calling set_column_string_values. - v = tablet->get_value(0, 2, dtype); - ASSERT_NE(v, nullptr); - { - common::String* s = static_cast(v); - EXPECT_EQ(std::string(s->buf_, s->len_), "str2"); - } - - v = tablet->get_value(1, 2, dtype); - ASSERT_NE(v, nullptr); - { - common::String* s = static_cast(v); - EXPECT_EQ(std::string(s->buf_, s->len_), "str3"); - } - - v = tablet->get_value(2, 2, dtype); - ASSERT_NE(v, nullptr); - { - common::String* s = static_cast(v); - EXPECT_EQ(std::string(s->buf_, s->len_), "str4"); - } - - delete tablet; -} diff --git a/cpp/test/cwrapper/c_release_test.cc b/cpp/test/cwrapper/c_release_test.cc index 375c7e115..751f9f9ea 100644 --- a/cpp/test/cwrapper/c_release_test.cc +++ b/cpp/test/cwrapper/c_release_test.cc @@ -18,11 +18,7 @@ */ #include -#ifdef _WIN32 -#include -#else #include -#endif #include #include @@ -40,6 +36,7 @@ class CReleaseTest : public testing::Test {}; TEST_F(CReleaseTest, TestCreateFile) { ERRNO error_no = RET_OK; + remove("create_file1.tsfile"); // Create File and Get RET_OK WriteFile file = write_file_new("create_file1.tsfile", &error_no); ASSERT_EQ(RET_OK, error_no); @@ -52,8 +49,7 @@ TEST_F(CReleaseTest, TestCreateFile) { // Folder file = write_file_new("test/", &error_no); - ASSERT_TRUE(error_no == RET_FILRET_OPEN_ERR || - error_no == RET_ALREADY_EXIST); + ASSERT_EQ(RET_FILRET_OPEN_ERR, error_no); remove("create_file1.tsfile"); free_write_file(&file); @@ -101,9 +97,13 @@ TEST_F(CReleaseTest, TsFileWriterNew) { table_schema.column_schemas = static_cast(malloc(sizeof(ColumnSchema) * 2)); table_schema.column_schemas[0] = - ColumnSchema{strdup("col1"), TS_DATATYPE_STRING, TAG}; + (ColumnSchema){.column_name = strdup("col1"), + .data_type = TS_DATATYPE_STRING, + .column_category = TAG}; table_schema.column_schemas[1] = - ColumnSchema{strdup("col2"), TS_DATATYPE_INT32, FIELD}; + (ColumnSchema){.column_name = strdup("col2"), + .data_type = TS_DATATYPE_INT32, + .column_category = FIELD}; writer = tsfile_writer_new(file, &table_schema, &error_code); ASSERT_EQ(RET_OK, error_code); @@ -128,15 +128,21 @@ TEST_F(CReleaseTest, TsFileWriterWriteDataAbnormalColumn) { abnormal_schema.column_schemas = static_cast(malloc(sizeof(ColumnSchema) * 4)); abnormal_schema.column_schemas[0] = - ColumnSchema{strdup("!@#$%^*()_+-="), TS_DATATYPE_STRING, TAG}; + (ColumnSchema){.column_name = strdup("!@#$%^*()_+-="), + .data_type = TS_DATATYPE_STRING, + .column_category = TAG}; // TAG's datatype is not correct abnormal_schema.column_schemas[1] = - ColumnSchema{strdup("TAG2"), TS_DATATYPE_INT32, TAG}; + (ColumnSchema){.column_name = strdup("TAG2"), + .data_type = TS_DATATYPE_INT32, + .column_category = TAG}; // same column name with column[0] abnormal_schema.column_schemas[2] = - ColumnSchema{strdup("!@#$%^*()_+-="), TS_DATATYPE_DOUBLE, FIELD}; + (ColumnSchema){.column_name = strdup("!@#$%^*()_+-="), + .data_type = TS_DATATYPE_DOUBLE, + .column_category = FIELD}; // column name conflict TsFileWriter writer = @@ -145,7 +151,9 @@ TEST_F(CReleaseTest, TsFileWriterWriteDataAbnormalColumn) { free(abnormal_schema.column_schemas[2].column_name); abnormal_schema.column_schemas[2] = - ColumnSchema{strdup("!@#$%^*()_+-=1"), TS_DATATYPE_DOUBLE, FIELD}; + (ColumnSchema){.column_name = strdup("!@#$%^*()_+-=1"), + .data_type = TS_DATATYPE_DOUBLE, + .column_category = FIELD}; // datatype conflict writer = tsfile_writer_new(file, &abnormal_schema, &error_code); @@ -153,7 +161,9 @@ TEST_F(CReleaseTest, TsFileWriterWriteDataAbnormalColumn) { free(abnormal_schema.column_schemas[1].column_name); abnormal_schema.column_schemas[1] = - ColumnSchema{strdup("TAG2"), TS_DATATYPE_STRING, TAG}; + (ColumnSchema){.column_name = strdup("TAG2"), + .data_type = TS_DATATYPE_STRING, + .column_category = TAG}; writer = tsfile_writer_new(file, &abnormal_schema, &error_code); ASSERT_EQ(RET_OK, error_code); @@ -225,17 +235,29 @@ TEST_F(CReleaseTest, TsFileWriterMultiDataType) { all_type_schema.column_schemas = static_cast(malloc(sizeof(ColumnSchema) * 6)); all_type_schema.column_schemas[0] = - ColumnSchema{strdup("TAG"), TS_DATATYPE_STRING, TAG}; + (ColumnSchema){.column_name = strdup("TAG"), + .data_type = TS_DATATYPE_STRING, + .column_category = TAG}; all_type_schema.column_schemas[1] = - ColumnSchema{strdup("INT32"), TS_DATATYPE_INT32, FIELD}; + (ColumnSchema){.column_name = strdup("INT32"), + .data_type = TS_DATATYPE_INT32, + .column_category = FIELD}; all_type_schema.column_schemas[2] = - ColumnSchema{strdup("INT64"), TS_DATATYPE_INT64, FIELD}; + (ColumnSchema){.column_name = strdup("INT64"), + .data_type = TS_DATATYPE_INT64, + .column_category = FIELD}; all_type_schema.column_schemas[3] = - ColumnSchema{strdup("FLOAT"), TS_DATATYPE_FLOAT, FIELD}; + (ColumnSchema){.column_name = strdup("FLOAT"), + .data_type = TS_DATATYPE_FLOAT, + .column_category = FIELD}; all_type_schema.column_schemas[4] = - ColumnSchema{strdup("DOUBLE"), TS_DATATYPE_DOUBLE, FIELD}; + (ColumnSchema){.column_name = strdup("DOUBLE"), + .data_type = TS_DATATYPE_DOUBLE, + .column_category = FIELD}; all_type_schema.column_schemas[5] = - ColumnSchema{strdup("BOOLEAN"), TS_DATATYPE_BOOLEAN, FIELD}; + (ColumnSchema){.column_name = strdup("BOOLEAN"), + .data_type = TS_DATATYPE_BOOLEAN, + .column_category = FIELD}; TsFileWriter writer = tsfile_writer_new(file, &all_type_schema, &error_code); @@ -388,4 +410,4 @@ TEST_F(CReleaseTest, TsFileWriterConfTest) { remove("plain_file.tsfile"); } -} // namespace CReleaseTest \ No newline at end of file +} // namespace CReleaseTest diff --git a/cpp/test/cwrapper/cwrapper_metadata_test.cc b/cpp/test/cwrapper/cwrapper_metadata_test.cc deleted file mode 100644 index 4595770a6..000000000 --- a/cpp/test/cwrapper/cwrapper_metadata_test.cc +++ /dev/null @@ -1,294 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#include -#ifdef _WIN32 -#include -#else -#include -#endif - -#include -#include - -extern "C" { -#include "cwrapper/errno_define_c.h" -#include "cwrapper/tsfile_cwrapper.h" -} - -namespace cwrapper_metadata { - -class CWrapperMetadataTest : public testing::Test {}; - -TEST_F(CWrapperMetadataTest, GetAllDevicesAndMetadataWithStatistic) { - ERRNO code = RET_OK; - const char* filename = "cwrapper_metadata_stat.tsfile"; - remove(filename); - - const char* device = "root.sg.d1"; - char* m_int = strdup("s_int"); - timeseries_schema sch{}; - sch.timeseries_name = m_int; - sch.data_type = TS_DATATYPE_INT32; - sch.encoding = TS_ENCODING_PLAIN; - sch.compression = TS_COMPRESSION_UNCOMPRESSED; - - auto* writer = static_cast( - _tsfile_writer_new(filename, 128 * 1024 * 1024, &code)); - ASSERT_EQ(RET_OK, code); - ASSERT_EQ(RET_OK, _tsfile_writer_register_timeseries(writer, device, &sch)); - - for (int row = 0; row < 3; row++) { - auto* record = static_cast( - _ts_record_new(device, static_cast(row + 1), 1)); - const int32_t v = static_cast((row + 1) * 10); - ASSERT_EQ(RET_OK, _insert_data_into_ts_record_by_name_int32_t( - record, m_int, v)); - ASSERT_EQ(RET_OK, _tsfile_writer_write_ts_record(writer, record)); - _free_tsfile_ts_record(reinterpret_cast(&record)); - } - ASSERT_EQ(RET_OK, _tsfile_writer_close(writer)); - - TsFileReader reader = tsfile_reader_new(filename, &code); - ASSERT_EQ(RET_OK, code); - ASSERT_NE(nullptr, reader); - - DeviceID* details = nullptr; - uint32_t n_det = 0; - ASSERT_EQ(RET_OK, tsfile_reader_get_all_devices(reader, &details, &n_det)); - ASSERT_EQ(1u, n_det); - ASSERT_NE(nullptr, details); - ASSERT_STREQ(device, details[0].path); - ASSERT_NE(nullptr, details[0].table_name); - EXPECT_STREQ("root.sg", details[0].table_name); - EXPECT_EQ(2u, details[0].segment_count); - ASSERT_NE(nullptr, details[0].segments); - EXPECT_STREQ("root.sg", details[0].segments[0]); - EXPECT_STREQ("d1", details[0].segments[1]); - tsfile_free_device_id_array(details, n_det); - - DeviceTimeseriesMetadataMap map{}; - ASSERT_EQ(RET_OK, tsfile_reader_get_timeseries_metadata_all(reader, &map)); - ASSERT_EQ(1u, map.device_count); - ASSERT_NE(nullptr, map.entries); - ASSERT_STREQ(device, map.entries[0].device.path); - ASSERT_NE(nullptr, map.entries[0].device.table_name); - EXPECT_STREQ("root.sg", map.entries[0].device.table_name); - EXPECT_EQ(2u, map.entries[0].device.segment_count); - ASSERT_NE(nullptr, map.entries[0].device.segments); - EXPECT_STREQ("root.sg", map.entries[0].device.segments[0]); - EXPECT_STREQ("d1", map.entries[0].device.segments[1]); - ASSERT_EQ(1u, map.entries[0].timeseries_count); - ASSERT_NE(nullptr, map.entries[0].timeseries); - TimeseriesMetadata& tm = map.entries[0].timeseries[0]; - ASSERT_STREQ(m_int, tm.measurement_name); - ASSERT_EQ(TS_DATATYPE_INT32, tm.data_type); - TsFileStatisticBase* sb = tsfile_statistic_base(&tm.statistic); - ASSERT_TRUE(sb->has_statistic); - EXPECT_EQ(3, sb->row_count); - EXPECT_EQ(1, sb->start_time); - EXPECT_EQ(3, sb->end_time); - EXPECT_DOUBLE_EQ(60.0, tm.statistic.u.int_s.sum); - ASSERT_EQ(TS_DATATYPE_INT32, sb->type); - EXPECT_EQ(10, tm.statistic.u.int_s.min_int64); - EXPECT_EQ(30, tm.statistic.u.int_s.max_int64); - EXPECT_EQ(10, tm.statistic.u.int_s.first_int64); - EXPECT_EQ(30, tm.statistic.u.int_s.last_int64); - - tsfile_free_device_timeseries_metadata_map(&map); - - DeviceTimeseriesMetadataMap empty{}; - ASSERT_EQ(RET_OK, tsfile_reader_get_timeseries_metadata_for_devices( - reader, nullptr, 0, &empty)); - EXPECT_EQ(0u, empty.device_count); - EXPECT_EQ(nullptr, empty.entries); - - DeviceID q{}; - q.path = const_cast(device); - q.table_name = nullptr; - q.segment_count = 0; - q.segments = nullptr; - DeviceTimeseriesMetadataMap one{}; - ASSERT_EQ(RET_OK, tsfile_reader_get_timeseries_metadata_for_devices( - reader, &q, 1, &one)); - ASSERT_EQ(1u, one.device_count); - tsfile_free_device_timeseries_metadata_map(&one); - - ASSERT_EQ(RET_OK, tsfile_reader_close(reader)); - free(m_int); - remove(filename); -} - -TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataBooleanStatistic) { - ERRNO code = RET_OK; - const char* filename = "cwrapper_metadata_bool.tsfile"; - remove(filename); - - const char* device = "root.sg.bool"; - char* m_b = strdup("s_bool"); - timeseries_schema sch{}; - sch.timeseries_name = m_b; - sch.data_type = TS_DATATYPE_BOOLEAN; - sch.encoding = TS_ENCODING_PLAIN; - sch.compression = TS_COMPRESSION_UNCOMPRESSED; - - auto* writer = static_cast( - _tsfile_writer_new(filename, 128 * 1024 * 1024, &code)); - ASSERT_EQ(RET_OK, code); - ASSERT_EQ(RET_OK, _tsfile_writer_register_timeseries(writer, device, &sch)); - - const bool vals[] = {true, false, true}; - for (int row = 0; row < 3; row++) { - auto* record = static_cast( - _ts_record_new(device, static_cast(row + 1), 1)); - ASSERT_EQ(RET_OK, _insert_data_into_ts_record_by_name_bool(record, m_b, - vals[row])); - ASSERT_EQ(RET_OK, _tsfile_writer_write_ts_record(writer, record)); - _free_tsfile_ts_record(reinterpret_cast(&record)); - } - ASSERT_EQ(RET_OK, _tsfile_writer_close(writer)); - - TsFileReader reader = tsfile_reader_new(filename, &code); - ASSERT_EQ(RET_OK, code); - - DeviceTimeseriesMetadataMap map{}; - ASSERT_EQ(RET_OK, tsfile_reader_get_timeseries_metadata_all(reader, &map)); - TimeseriesMetadata& tm = map.entries[0].timeseries[0]; - ASSERT_STREQ(m_b, tm.measurement_name); - ASSERT_EQ(TS_DATATYPE_BOOLEAN, tm.data_type); - TsFileStatisticBase* sb = tsfile_statistic_base(&tm.statistic); - ASSERT_TRUE(sb->has_statistic); - EXPECT_DOUBLE_EQ(2.0, tm.statistic.u.bool_s.sum); - ASSERT_EQ(TS_DATATYPE_BOOLEAN, sb->type); - EXPECT_TRUE(tm.statistic.u.bool_s.first_bool); - EXPECT_TRUE(tm.statistic.u.bool_s.last_bool); - - tsfile_free_device_timeseries_metadata_map(&map); - ASSERT_EQ(RET_OK, tsfile_reader_close(reader)); - free(m_b); - remove(filename); -} - -TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataStringStatistic) { - ERRNO code = RET_OK; - const char* filename = "cwrapper_metadata_str.tsfile"; - remove(filename); - - const char* device = "root.sg.str"; - char* m_str = strdup("s_str"); - timeseries_schema sch{}; - sch.timeseries_name = m_str; - sch.data_type = TS_DATATYPE_STRING; - sch.encoding = TS_ENCODING_PLAIN; - sch.compression = TS_COMPRESSION_UNCOMPRESSED; - - auto* writer = static_cast( - _tsfile_writer_new(filename, 128 * 1024 * 1024, &code)); - ASSERT_EQ(RET_OK, code); - ASSERT_EQ(RET_OK, _tsfile_writer_register_timeseries(writer, device, &sch)); - - const char* vals[] = {"aa", "cc", "bb"}; - for (int row = 0; row < 3; row++) { - auto* record = static_cast( - _ts_record_new(device, static_cast(row + 1), 1)); - ASSERT_EQ(RET_OK, _insert_data_into_ts_record_by_name_string_with_len( - record, m_str, vals[row], - static_cast(std::strlen(vals[row])))); - ASSERT_EQ(RET_OK, _tsfile_writer_write_ts_record(writer, record)); - _free_tsfile_ts_record(reinterpret_cast(&record)); - } - ASSERT_EQ(RET_OK, _tsfile_writer_close(writer)); - - TsFileReader reader = tsfile_reader_new(filename, &code); - ASSERT_EQ(RET_OK, code); - - DeviceTimeseriesMetadataMap map{}; - ASSERT_EQ(RET_OK, tsfile_reader_get_timeseries_metadata_all(reader, &map)); - ASSERT_EQ(1u, map.device_count); - TimeseriesMetadata& tm = map.entries[0].timeseries[0]; - ASSERT_STREQ(m_str, tm.measurement_name); - ASSERT_EQ(TS_DATATYPE_STRING, tm.data_type); - TsFileStatisticBase* sb = tsfile_statistic_base(&tm.statistic); - ASSERT_TRUE(sb->has_statistic); - ASSERT_EQ(TS_DATATYPE_STRING, sb->type); - ASSERT_NE(nullptr, tm.statistic.u.string_s.str_min); - ASSERT_NE(nullptr, tm.statistic.u.string_s.str_max); - ASSERT_NE(nullptr, tm.statistic.u.string_s.str_first); - ASSERT_NE(nullptr, tm.statistic.u.string_s.str_last); - EXPECT_STREQ("aa", tm.statistic.u.string_s.str_min); - EXPECT_STREQ("cc", tm.statistic.u.string_s.str_max); - EXPECT_STREQ("aa", tm.statistic.u.string_s.str_first); - EXPECT_STREQ("bb", tm.statistic.u.string_s.str_last); - - tsfile_free_device_timeseries_metadata_map(&map); - ASSERT_EQ(RET_OK, tsfile_reader_close(reader)); - free(m_str); - remove(filename); -} - -TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataNullDevicePath) { - ERRNO code = RET_OK; - const char* filename = "cwrapper_metadata_null_path.tsfile"; - remove(filename); - - auto* writer = static_cast( - _tsfile_writer_new(filename, 128 * 1024 * 1024, &code)); - ASSERT_EQ(RET_OK, code); - ASSERT_EQ(RET_OK, _tsfile_writer_close(writer)); - - TsFileReader reader = tsfile_reader_new(filename, &code); - ASSERT_EQ(RET_OK, code); - - DeviceID bad{}; - bad.path = nullptr; - bad.table_name = nullptr; - bad.segment_count = 0; - bad.segments = nullptr; - DeviceTimeseriesMetadataMap map{}; - EXPECT_EQ(RET_INVALID_ARG, - tsfile_reader_get_timeseries_metadata_for_devices(reader, &bad, 1, - &map)); - - ASSERT_EQ(RET_OK, tsfile_reader_close(reader)); - remove(filename); -} - -TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataInvalidArgs) { - ERRNO code = RET_OK; - const char* filename = "cwrapper_metadata_empty.tsfile"; - remove(filename); - - auto* writer = static_cast( - _tsfile_writer_new(filename, 128 * 1024 * 1024, &code)); - ASSERT_EQ(RET_OK, code); - ASSERT_EQ(RET_OK, _tsfile_writer_close(writer)); - - TsFileReader reader = tsfile_reader_new(filename, &code); - ASSERT_EQ(RET_OK, code); - - DeviceTimeseriesMetadataMap map{}; - EXPECT_NE(RET_OK, tsfile_reader_get_timeseries_metadata_all(nullptr, &map)); - EXPECT_NE(RET_OK, - tsfile_reader_get_timeseries_metadata_all(reader, nullptr)); - - ASSERT_EQ(RET_OK, tsfile_reader_close(reader)); - remove(filename); -} - -} // namespace cwrapper_metadata diff --git a/cpp/test/cwrapper/cwrapper_test.cc b/cpp/test/cwrapper/cwrapper_test.cc index 9cf06d2f8..5998939af 100644 --- a/cpp/test/cwrapper/cwrapper_test.cc +++ b/cpp/test/cwrapper/cwrapper_test.cc @@ -17,11 +17,7 @@ * under the License. */ #include -#ifdef _WIN32 -#include -#else #include -#endif #include #include diff --git a/cpp/test/cwrapper/query_by_row_cwrapper_test.cc b/cpp/test/cwrapper/query_by_row_cwrapper_test.cc index 3de447ffd..a84625975 100644 --- a/cpp/test/cwrapper/query_by_row_cwrapper_test.cc +++ b/cpp/test/cwrapper/query_by_row_cwrapper_test.cc @@ -215,9 +215,8 @@ TEST_F(CWrapperQueryByRowTest, TableByRowOffsetLimit) { const int offset = 3; const int limit = 5; - ResultSet rs = tsfile_reader_query_table_by_row(reader, table_name.c_str(), - column_names_c, 2, offset, - limit, NULL, 0, &code); + ResultSet rs = tsfile_reader_query_table_by_row( + reader, table_name.c_str(), column_names_c, 2, offset, limit, &code); ASSERT_EQ(code, RET_OK); ASSERT_NE(rs, nullptr); diff --git a/cpp/test/encoding/gorilla_codec_test.cc b/cpp/test/encoding/gorilla_codec_test.cc index 47056a6db..9336d081e 100644 --- a/cpp/test/encoding/gorilla_codec_test.cc +++ b/cpp/test/encoding/gorilla_codec_test.cc @@ -207,4 +207,190 @@ TEST_F(GorillaCodecTest, DoubleEncodingDecodingBoundaryValues) { } } +// ── Batch decode tests (exercises the raw-pointer GorillaBitReader path) ── + +TEST_F(GorillaCodecTest, Int32BatchDecode) { + storage::IntGorillaEncoder encoder; + common::ByteStream stream(1024, common::MOD_DEFAULT); + const int N = 500; + int32_t expected[N]; + for (int i = 0; i < N; i++) { + expected[i] = i * 7 - 100; + EXPECT_EQ(encoder.encode(expected[i], stream), common::E_OK); + } + encoder.flush(stream); + + // Copy to a contiguous buffer and wrap (simulates production path) + uint32_t total = stream.total_size(); + std::vector buf(total); + uint32_t got = 0; + stream.read_buf(buf.data(), total, got); + ASSERT_EQ(got, total); + + common::ByteStream wrapped(common::MOD_DEFAULT); + wrapped.wrap_from((const char*)buf.data(), total); + + storage::IntGorillaDecoder decoder; + int32_t out[N]; + int total_decoded = 0; + while (decoder.has_remaining(wrapped) && total_decoded < N) { + int batch = std::min(129, N - total_decoded); + int actual = 0; + EXPECT_EQ(decoder.read_batch_int32(out + total_decoded, batch, actual, + wrapped), + common::E_OK); + if (actual == 0) break; + total_decoded += actual; + } + ASSERT_EQ(total_decoded, N); + for (int i = 0; i < N; i++) { + EXPECT_EQ(out[i], expected[i]) << "mismatch at index " << i; + } +} + +TEST_F(GorillaCodecTest, Int64BatchDecode) { + storage::LongGorillaEncoder encoder; + common::ByteStream stream(1024, common::MOD_DEFAULT); + const int N = 500; + int64_t expected[N]; + for (int i = 0; i < N; i++) { + expected[i] = (int64_t)i * 13 - 200; + EXPECT_EQ(encoder.encode(expected[i], stream), common::E_OK); + } + encoder.flush(stream); + + uint32_t total = stream.total_size(); + std::vector buf(total); + uint32_t got = 0; + stream.read_buf(buf.data(), total, got); + + common::ByteStream wrapped(common::MOD_DEFAULT); + wrapped.wrap_from((const char*)buf.data(), total); + + storage::LongGorillaDecoder decoder; + int64_t out[N]; + int total_decoded = 0; + while (decoder.has_remaining(wrapped) && total_decoded < N) { + int batch = std::min(129, N - total_decoded); + int actual = 0; + EXPECT_EQ(decoder.read_batch_int64(out + total_decoded, batch, actual, + wrapped), + common::E_OK); + if (actual == 0) break; + total_decoded += actual; + } + ASSERT_EQ(total_decoded, N); + for (int i = 0; i < N; i++) { + EXPECT_EQ(out[i], expected[i]) << "mismatch at index " << i; + } +} + +TEST_F(GorillaCodecTest, FloatBatchDecode) { + storage::FloatGorillaEncoder encoder; + common::ByteStream stream(1024, common::MOD_DEFAULT); + const int N = 300; + std::vector expected(N); + for (int i = 0; i < N; i++) { + expected[i] = (float)i * 1.5f - 50.0f; + EXPECT_EQ(encoder.encode(expected[i], stream), common::E_OK); + } + encoder.flush(stream); + + uint32_t total = stream.total_size(); + std::vector buf(total); + uint32_t got = 0; + stream.read_buf(buf.data(), total, got); + + common::ByteStream wrapped(common::MOD_DEFAULT); + wrapped.wrap_from((const char*)buf.data(), total); + + storage::FloatGorillaDecoder decoder; + std::vector out(N); + int total_decoded = 0; + while (decoder.has_remaining(wrapped) && total_decoded < N) { + int batch = std::min(129, N - total_decoded); + int actual = 0; + EXPECT_EQ(decoder.read_batch_float(out.data() + total_decoded, batch, + actual, wrapped), + common::E_OK); + if (actual == 0) break; + total_decoded += actual; + } + ASSERT_EQ(total_decoded, N); + for (int i = 0; i < N; i++) { + EXPECT_FLOAT_EQ(out[i], expected[i]) << "mismatch at index " << i; + } +} + +TEST_F(GorillaCodecTest, DoubleBatchDecode) { + storage::DoubleGorillaEncoder encoder; + common::ByteStream stream(1024, common::MOD_DEFAULT); + const int N = 300; + std::vector expected(N); + for (int i = 0; i < N; i++) { + expected[i] = (double)i * 2.7 - 100.0; + EXPECT_EQ(encoder.encode(expected[i], stream), common::E_OK); + } + encoder.flush(stream); + + uint32_t total = stream.total_size(); + std::vector buf(total); + uint32_t got = 0; + stream.read_buf(buf.data(), total, got); + + common::ByteStream wrapped(common::MOD_DEFAULT); + wrapped.wrap_from((const char*)buf.data(), total); + + storage::DoubleGorillaDecoder decoder; + std::vector out(N); + int total_decoded = 0; + while (decoder.has_remaining(wrapped) && total_decoded < N) { + int batch = std::min(129, N - total_decoded); + int actual = 0; + EXPECT_EQ(decoder.read_batch_double(out.data() + total_decoded, batch, + actual, wrapped), + common::E_OK); + if (actual == 0) break; + total_decoded += actual; + } + ASSERT_EQ(total_decoded, N); + for (int i = 0; i < N; i++) { + EXPECT_DOUBLE_EQ(out[i], expected[i]) << "mismatch at index " << i; + } +} + +TEST_F(GorillaCodecTest, Int32BatchSkip) { + storage::IntGorillaEncoder encoder; + common::ByteStream stream(1024, common::MOD_DEFAULT); + const int N = 200; + int32_t expected[N]; + for (int i = 0; i < N; i++) { + expected[i] = i * 3; + EXPECT_EQ(encoder.encode(expected[i], stream), common::E_OK); + } + encoder.flush(stream); + + uint32_t total = stream.total_size(); + std::vector buf(total); + uint32_t got = 0; + stream.read_buf(buf.data(), total, got); + + common::ByteStream wrapped(common::MOD_DEFAULT); + wrapped.wrap_from((const char*)buf.data(), total); + + storage::IntGorillaDecoder decoder; + // Skip first 50 values + int skipped = 0; + EXPECT_EQ(decoder.skip_int32(50, skipped, wrapped), common::E_OK); + EXPECT_EQ(skipped, 50); + // Read next 50 values + int32_t out[50]; + int actual = 0; + EXPECT_EQ(decoder.read_batch_int32(out, 50, actual, wrapped), common::E_OK); + EXPECT_EQ(actual, 50); + for (int i = 0; i < 50; i++) { + EXPECT_EQ(out[i], expected[50 + i]) << "mismatch at index " << i; + } +} + } // namespace storage diff --git a/cpp/test/encoding/int32_rle_codec_test.cc b/cpp/test/encoding/int32_rle_codec_test.cc index dfc737c8b..c580a0eb1 100644 --- a/cpp/test/encoding/int32_rle_codec_test.cc +++ b/cpp/test/encoding/int32_rle_codec_test.cc @@ -164,133 +164,4 @@ TEST_F(Int32RleEncoderTest, EncodeFlushWithoutData) { EXPECT_EQ(stream.total_size(), 0u); } -// Helper: write a manually crafted RLE segment (Java/Parquet hybrid RLE -// format): -// [length_varint] [bit_width] [group_header_varint] [value_bytes...] -// run_count must be the actual count (written as (run_count<<1)|0 varint). -static void write_rle_segment(common::ByteStream& stream, uint8_t bit_width, - uint32_t run_count, int32_t value) { - common::ByteStream content(32, common::MOD_ENCODER_OBJ); - common::SerializationUtil::write_ui8(bit_width, content); - // Group header: (run_count << 1) | 0 = even varint - common::SerializationUtil::write_var_uint(run_count << 1, content); - // Value: ceil(bit_width / 8) bytes, little-endian - int byte_width = (bit_width + 7) / 8; - uint32_t uvalue = static_cast(value); - for (int i = 0; i < byte_width; i++) { - common::SerializationUtil::write_ui8((uvalue >> (i * 8)) & 0xFF, - content); - } - uint32_t length = content.total_size(); - common::SerializationUtil::write_var_uint(length, stream); - // Append content bytes to stream - uint8_t buf[64]; - uint32_t read_len = 0; - content.read_buf(buf, length, read_len); - stream.write_buf(buf, read_len); -} - -// Regression test: run_count=64 requires a 2-byte LEB128 varint header -// ((64<<1)|0 = 128 = [0x80, 0x01]). Before the fix, only 1 byte was read, -// causing byte misalignment and incorrect decoding. -TEST_F(Int32RleEncoderTest, DecodeRleRunCountExactly64) { - common::ByteStream stream(32, common::MOD_ENCODER_OBJ); - write_rle_segment(stream, /*bit_width=*/7, /*run_count=*/64, - /*value=*/42); - - Int32RleDecoder decoder; - std::vector decoded; - while (decoder.has_next(stream)) { - int32_t v; - decoder.read_int32(v, stream); - decoded.push_back(v); - } - - ASSERT_EQ(decoded.size(), 64u); - for (int32_t v : decoded) { - EXPECT_EQ(v, 42); - } -} - -// Run counts of 128 and 256 each need a 2-byte varint header. -TEST_F(Int32RleEncoderTest, DecodeRleRunCountLarge) { - for (uint32_t count : {128u, 256u, 500u}) { - common::ByteStream stream(64, common::MOD_ENCODER_OBJ); - write_rle_segment(stream, /*bit_width=*/8, /*run_count=*/count, - /*value=*/100); - - Int32RleDecoder decoder; - std::vector decoded; - while (decoder.has_next(stream)) { - int32_t v; - decoder.read_int32(v, stream); - decoded.push_back(v); - } - - ASSERT_EQ(decoded.size(), (size_t)count) - << "Failed for run_count=" << count; - for (int32_t v : decoded) { - EXPECT_EQ(v, 100); - } - } -} - -// Multiple consecutive RLE runs including large ones (simulates real sensor -// data with repeated values and occasional changes). -TEST_F(Int32RleEncoderTest, DecodeMultipleRleRunsWithLargeCount) { - common::ByteStream stream(128, common::MOD_ENCODER_OBJ); - write_rle_segment(stream, /*bit_width=*/8, /*run_count=*/64, - /*value=*/25); - write_rle_segment(stream, /*bit_width=*/8, /*run_count=*/8, - /*value=*/26); - write_rle_segment(stream, /*bit_width=*/8, /*run_count=*/100, - /*value=*/25); - - Int32RleDecoder decoder; - std::vector decoded; - while (decoder.has_next(stream)) { - int32_t v; - decoder.read_int32(v, stream); - decoded.push_back(v); - } - - ASSERT_EQ(decoded.size(), 172u); // 64 + 8 + 100 - for (size_t i = 0; i < 64; i++) EXPECT_EQ(decoded[i], 25); - for (size_t i = 64; i < 72; i++) EXPECT_EQ(decoded[i], 26); - for (size_t i = 72; i < 172; i++) EXPECT_EQ(decoded[i], 25); -} - -// Regression test: Int32RleDecoder::reset() previously called delete[] on -// current_buffer_ which was allocated with mem_alloc (malloc). This is -// undefined behaviour and typically causes a crash. The fix uses mem_free. -TEST_F(Int32RleEncoderTest, ResetAfterDecodeNoCrash) { - common::ByteStream stream(1024, common::MOD_ENCODER_OBJ); - Int32RleEncoder encoder; - for (int i = 0; i < 16; i++) encoder.encode(i, stream); - encoder.flush(stream); - - Int32RleDecoder decoder; - // Decode at least one value to populate current_buffer_ via mem_alloc. - int32_t v; - ASSERT_TRUE(decoder.has_next(stream)); - decoder.read_int32(v, stream); - - // reset() must use mem_free, not delete[]. Before the fix this would crash. - decoder.reset(); - - // Verify the decoder is functional after reset. - common::ByteStream stream2(1024, common::MOD_ENCODER_OBJ); - Int32RleEncoder encoder2; - std::vector input = {7, 7, 7, 7, 7, 7, 7, 7}; - for (int32_t x : input) encoder2.encode(x, stream2); - encoder2.flush(stream2); - - std::vector decoded; - while (decoder.has_next(stream2)) { - decoder.read_int32(v, stream2); - decoded.push_back(v); - } - ASSERT_EQ(decoded, input); -} - } // namespace storage diff --git a/cpp/test/file/restorable_tsfile_io_writer_test.cc b/cpp/test/file/restorable_tsfile_io_writer_test.cc index 8f723e056..655995d35 100644 --- a/cpp/test/file/restorable_tsfile_io_writer_test.cc +++ b/cpp/test/file/restorable_tsfile_io_writer_test.cc @@ -44,7 +44,6 @@ namespace storage { class ResultSet; } - using namespace storage; using namespace common; @@ -354,92 +353,6 @@ TEST_F(RestorableTsFileIOWriterTest, MultiDeviceRecoverAndWriteWithTreeWriter) { reader.close(); } -TEST_F(RestorableTsFileIOWriterTest, - MultiDeviceRecoverAndWriteWithTreeWriterMultipleTimes) { - TsFileWriter tw; - ASSERT_EQ(tw.open(file_name_, GetWriteCreateFlags(), 0666), E_OK); - tw.register_timeseries("d1", MeasurementSchema("s1", FLOAT)); - tw.register_timeseries("d1", MeasurementSchema("s2", INT32)); - tw.register_timeseries("d2", MeasurementSchema("s1", FLOAT)); - tw.register_timeseries("d2", MeasurementSchema("s2", DOUBLE)); - - TsRecord r1(1, "d1"); - r1.add_point("s1", 1.0f); - r1.add_point("s2", 10); - ASSERT_EQ(tw.write_record(r1), E_OK); - TsRecord r2(2, "d2"); - r2.add_point("s1", 2.0f); - r2.add_point("s2", 20.0); - ASSERT_EQ(tw.write_record(r2), E_OK); - tw.flush(); - tw.close(); - - for (int i = 0; i < 3; ++i) { - CorruptCurrentFileTail(3 + i); - - RestorableTsFileIOWriter rw; - ASSERT_EQ(rw.open(file_name_, true), E_OK); - ASSERT_TRUE(rw.can_write()); - ASSERT_TRUE(rw.has_crashed()); - ASSERT_GE(rw.get_truncated_size(), - static_cast(MAGIC_STRING_TSFILE_LEN + 1)); - - TsFileTreeWriter tree_writer(&rw); - TsRecord r3(3 + 2 * i, "d1"); - r3.add_point("s1", static_cast(3 + 2 * i)); - r3.add_point("s2", 30 + 20 * i); - ASSERT_EQ(tree_writer.write(r3), E_OK); - TsRecord r4(4 + 2 * i, "d2"); - r4.add_point("s1", static_cast(4 + 2 * i)); - r4.add_point("s2", 40.0 + 20.0 * i); - ASSERT_EQ(tree_writer.write(r4), E_OK); - ASSERT_EQ(tree_writer.flush(), E_OK); - ASSERT_EQ(tree_writer.close(), E_OK); - } - - TsFileTreeReader reader; - ASSERT_EQ(reader.open(file_name_), E_OK); - ASSERT_EQ(reader.get_all_device_ids().size(), 2u); - // Multi-round corruption/recovery should keep the file readable. - ASSERT_EQ(CountTreeReaderRows(reader, {"s1", "s2"}), 4); - reader.close(); -} - -TEST_F(RestorableTsFileIOWriterTest, - TreeWriterRepeatedWriteAfterRecoveryShouldRejectDuplicateTimestamps) { - TsFileWriter tw; - ASSERT_EQ(tw.open(file_name_, GetWriteCreateFlags(), 0666), E_OK); - tw.register_timeseries( - "root.d1", - MeasurementSchema("s1", FLOAT, GORILLA, CompressionType::UNCOMPRESSED)); - TsRecord record(1, "root.d1"); - record.add_point("s1", 1.0f); - ASSERT_EQ(tw.write_record(record), E_OK); - record.timestamp_ = 2; - ASSERT_EQ(tw.write_record(record), E_OK); - tw.flush(); - tw.close(); - - for (int round = 0; round < 2; ++round) { - CorruptCurrentFileTail(3); - - RestorableTsFileIOWriter rw; - ASSERT_EQ(rw.open(file_name_, true), E_OK); - ASSERT_TRUE(rw.can_write()); - - TsFileTreeWriter tree_writer(&rw); - TsRecord record2(3, "root.d1"); - record2.add_point("s1", 3.0f); - if (round == 0) { - ASSERT_EQ(tree_writer.write(record2), E_OK); - ASSERT_EQ(tree_writer.flush(), E_OK); - } else { - ASSERT_EQ(tree_writer.write(record2), E_OUT_OF_ORDER); - } - ASSERT_EQ(tree_writer.close(), E_OK); - } -} - // ----------------------------------------------------------------------------- // Tree model + Recovery + continued write with aligned timeseries, then // read-back verify @@ -582,416 +495,3 @@ TEST_F(RestorableTsFileIOWriterTest, TableWriterRecoverAndWrite) { table_reader.destroy_query_data_set(tmp_result_set); table_reader.close(); } - -TEST_F(RestorableTsFileIOWriterTest, TableWriterRecoverAndWrite1) { - using namespace std; - string table_name = "test_table"; - vector column_names = {"t1", "f1", "f2", "f3", "f4", "f5", - "f6", "f7", "f8", "f9", "f10"}; - vector data_types = {STRING, BOOLEAN, INT32, INT64, - FLOAT, DOUBLE, TEXT, STRING, - BLOB, DATE, TIMESTAMP}; - std::vector column_schemas; - for (int i = 0; i < column_names.size(); i++) { - column_schemas.push_back( - new MeasurementSchema(column_names[i], data_types[i])); - } - std::vector column_categories = { - ColumnCategory::TAG, ColumnCategory::FIELD, ColumnCategory::FIELD, - ColumnCategory::FIELD, ColumnCategory::FIELD, ColumnCategory::FIELD, - ColumnCategory::FIELD, ColumnCategory::FIELD, ColumnCategory::FIELD, - ColumnCategory::FIELD, ColumnCategory::FIELD}; - TableSchema table_schema(table_name, column_schemas, column_categories); - - WriteFile write_file; - write_file.create(file_name_, GetWriteCreateFlags(), 0666); - TsFileTableWriter table_writer(&write_file, &table_schema); - uint32_t max_rows = 10; - Tablet tablet(table_schema.get_measurement_names(), - table_schema.get_data_types(), max_rows); - tablet.set_table_name(table_name); - for (int row = 0; row < max_rows; row++) { - ASSERT_EQ(tablet.add_timestamp(row, static_cast(row)), E_OK); - if (row % 2 == 0) { - ASSERT_EQ(tablet.add_value(row, column_names[0], "device0"), E_OK); - ASSERT_EQ(tablet.add_value(row, column_names[1], row % 2 == 0), - E_OK); - ASSERT_EQ(tablet.add_value(row, column_names[2], - static_cast(row)), - E_OK); - ASSERT_EQ(tablet.add_value(row, column_names[3], - static_cast(row)), - E_OK); - ASSERT_EQ(tablet.add_value(row, column_names[4], - static_cast(row * 1.1)), - E_OK); - ASSERT_EQ(tablet.add_value(row, column_names[5], - static_cast(row * 1.1)), - E_OK); - ASSERT_EQ(tablet.add_value(row, column_names[6], - ("text" + to_string(row)).c_str()), - E_OK); - ASSERT_EQ(tablet.add_value(row, column_names[7], - ("string" + to_string(row)).c_str()), - E_OK); - ASSERT_EQ(tablet.add_value(row, column_names[8], - ("blob" + to_string(row)).c_str()), - E_OK); - ASSERT_EQ(tablet.add_value(row, column_names[9], - static_cast(row)), - E_OK); - ASSERT_EQ(tablet.add_value(row, column_names[10], - static_cast(row)), - E_OK); - } - } - ASSERT_EQ(table_writer.write_table(tablet), E_OK); - ASSERT_EQ(table_writer.flush(), E_OK); - ASSERT_EQ(table_writer.close(), E_OK); - ASSERT_EQ(write_file.close(), E_OK); - - CorruptCurrentFileTail(10); - RestorableTsFileIOWriter rw; - ASSERT_EQ(rw.open(file_name_, true), E_OK); - ASSERT_TRUE(rw.can_write()); - - TsFileTableWriter table_writer2(&rw); - vector column_names2 = {"__level1", "f1", "f2", "f3", "f4", "f5", - "f6", "f7", "f8", "f9", "f10"}; - vector data_types2 = {STRING, BOOLEAN, INT32, INT64, - FLOAT, DOUBLE, TEXT, STRING, - BLOB, DATE, TIMESTAMP}; - uint32_t max_rows2 = 10; - Tablet tablet2(column_names2, data_types2, max_rows2); - tablet2.set_table_name(table_name); - for (int row = 0; row < max_rows; row++) { - ASSERT_EQ( - tablet2.add_timestamp(row, static_cast(row + max_rows)), - E_OK); - if (row % 2 == 0) { - ASSERT_EQ(tablet2.add_value(row, column_names2[0], "device1"), - E_OK); - ASSERT_EQ(tablet2.add_value(row, column_names2[1], row % 2 == 0), - E_OK); - ASSERT_EQ(tablet2.add_value(row, column_names2[2], - static_cast(row)), - E_OK); - ASSERT_EQ(tablet2.add_value(row, column_names2[3], - static_cast(row)), - E_OK); - ASSERT_EQ(tablet2.add_value(row, column_names2[4], - static_cast(row * 1.1)), - E_OK); - ASSERT_EQ(tablet2.add_value(row, column_names2[5], - static_cast(row * 1.1)), - E_OK); - ASSERT_EQ(tablet2.add_value(row, column_names2[6], - ("text" + to_string(row)).c_str()), - E_OK); - ASSERT_EQ(tablet2.add_value(row, column_names2[7], - ("string" + to_string(row)).c_str()), - E_OK); - ASSERT_EQ(tablet2.add_value(row, column_names2[8], - ("blob" + to_string(row)).c_str()), - E_OK); - ASSERT_EQ(tablet2.add_value(row, column_names2[9], - static_cast(row)), - E_OK); - ASSERT_EQ(tablet2.add_value(row, column_names2[10], - static_cast(row)), - E_OK); - } - } - ASSERT_EQ(table_writer2.write_table(tablet2), E_OK); - ASSERT_EQ(table_writer2.flush(), E_OK); - ASSERT_EQ(table_writer2.close(), E_OK); - - TsFileReader table_reader; - ASSERT_EQ(table_reader.open(file_name_), E_OK); - DeviceTimeseriesMetadataMap metadata = - table_reader.get_timeseries_metadata(); - ASSERT_EQ(metadata.size(), 3u); - - storage::ResultSet* temp_ret = nullptr; - ASSERT_EQ(table_reader.query(table_name, column_names2, 0, 100, temp_ret), - E_OK); - auto* table_result_set = dynamic_cast(temp_ret); - ASSERT_NE(table_result_set, nullptr); - bool has_next = false; - int64_t row_num = 0; - while (IS_SUCC(table_result_set->next(has_next)) && has_next) { - (void)table_result_set->get_row_record(); - row_num++; - } - // 两次写入各 10 行:奇数行仅时间(null 设备)+ 偶数行带 device,共 20 - // 行可查 - ASSERT_EQ(row_num, 20); - table_result_set->close(); - table_reader.destroy_query_data_set(temp_ret); - table_reader.close(); -} - -TEST_F(RestorableTsFileIOWriterTest, - TableWriterRecoverAndWriteNullTagFloatDoubleStatistics) { - using namespace std; - const string table_name = "test_table"; - vector column_names = {"t1", "t2", "t3", "f1", "f2", "f3", "f4", - "f5", "f6", "f7", "f8", "f9", "f10"}; - vector data_types = {STRING, STRING, STRING, BOOLEAN, INT32, - INT64, FLOAT, DOUBLE, TEXT, STRING, - BLOB, DATE, TIMESTAMP}; - std::vector column_schemas; - for (size_t i = 0; i < column_names.size(); i++) { - column_schemas.push_back( - new MeasurementSchema(column_names[i], data_types[i])); - } - std::vector column_categories = { - ColumnCategory::TAG, ColumnCategory::TAG, ColumnCategory::TAG, - ColumnCategory::FIELD, ColumnCategory::FIELD, ColumnCategory::FIELD, - ColumnCategory::FIELD, ColumnCategory::FIELD, ColumnCategory::FIELD, - ColumnCategory::FIELD, ColumnCategory::FIELD, ColumnCategory::FIELD, - ColumnCategory::FIELD}; - TableSchema table_schema(table_name, column_schemas, column_categories); - - WriteFile write_file; - ASSERT_EQ(write_file.create(file_name_, GetWriteCreateFlags(), 0666), E_OK); - TsFileTableWriter table_writer(&write_file, &table_schema); - constexpr uint32_t max_rows = 10; - Tablet tablet(table_schema.get_measurement_names(), - table_schema.get_data_types(), max_rows); - tablet.set_table_name(table_name); - for (int row = 0; row < static_cast(max_rows); row++) { - ASSERT_EQ(tablet.add_timestamp(row, static_cast(row)), E_OK); - if (row % 2 == 0) { - ASSERT_EQ(tablet.add_value(row, "t1", "device1"), E_OK); - ASSERT_EQ(tablet.add_value(row, "t2", "device2"), E_OK); - ASSERT_EQ(tablet.add_value(row, "t3", "device3"), E_OK); - ASSERT_EQ(tablet.add_value(row, "f1", row % 2 == 0), E_OK); - ASSERT_EQ(tablet.add_value(row, "f2", static_cast(row)), - E_OK); - ASSERT_EQ(tablet.add_value(row, "f3", static_cast(row)), - E_OK); - ASSERT_EQ( - tablet.add_value(row, "f4", static_cast(row * 1.1)), - E_OK); - ASSERT_EQ( - tablet.add_value(row, "f5", static_cast(row * 1.1)), - E_OK); - ASSERT_EQ( - tablet.add_value(row, "f6", ("text" + to_string(row)).c_str()), - E_OK); - ASSERT_EQ(tablet.add_value(row, "f7", - ("string" + to_string(row)).c_str()), - E_OK); - ASSERT_EQ( - tablet.add_value(row, "f8", ("blob" + to_string(row)).c_str()), - E_OK); - ASSERT_EQ(tablet.add_value(row, "f9", static_cast(row)), - E_OK); - ASSERT_EQ(tablet.add_value(row, "f10", static_cast(row)), - E_OK); - } - } - ASSERT_EQ(table_writer.write_table(tablet), E_OK); - ASSERT_EQ(table_writer.flush(), E_OK); - ASSERT_EQ(table_writer.close(), E_OK); - ASSERT_EQ(write_file.close(), E_OK); - - CorruptCurrentFileTail(10); - - RestorableTsFileIOWriter rw; - ASSERT_EQ(rw.open(file_name_, true), E_OK); - ASSERT_TRUE(rw.can_write()); - - TsFileTableWriter table_writer2(&rw); - vector column_names2 = { - "__level1", "__level2", "__level3", "f1", "f2", "f3", "f4", - "f5", "f6", "f7", "f8", "f9", "f10"}; - Tablet tablet2(column_names2, data_types, max_rows); - tablet2.set_table_name(table_name); - for (int row = 0; row < static_cast(max_rows); row++) { - ASSERT_EQ( - tablet2.add_timestamp(row, static_cast(row + max_rows)), - E_OK); - ASSERT_EQ(tablet2.add_value(row, "__level1", "device1"), E_OK); - ASSERT_EQ(tablet2.add_value(row, "__level2", "device2"), E_OK); - ASSERT_EQ(tablet2.add_value(row, "__level3", "device3"), E_OK); - ASSERT_EQ(tablet2.add_value(row, "f1", row % 2 == 0), E_OK); - ASSERT_EQ(tablet2.add_value(row, "f2", static_cast(row)), - E_OK); - ASSERT_EQ(tablet2.add_value(row, "f3", static_cast(row)), - E_OK); - ASSERT_EQ(tablet2.add_value(row, "f4", static_cast(row * 1.1)), - E_OK); - ASSERT_EQ(tablet2.add_value(row, "f5", static_cast(row * 1.1)), - E_OK); - ASSERT_EQ( - tablet2.add_value(row, "f6", ("text" + to_string(row)).c_str()), - E_OK); - ASSERT_EQ( - tablet2.add_value(row, "f7", ("string" + to_string(row)).c_str()), - E_OK); - ASSERT_EQ( - tablet2.add_value(row, "f8", ("blob" + to_string(row)).c_str()), - E_OK); - ASSERT_EQ(tablet2.add_value(row, "f9", static_cast(row)), - E_OK); - ASSERT_EQ(tablet2.add_value(row, "f10", static_cast(row)), - E_OK); - } - ASSERT_EQ(table_writer2.write_table(tablet2), E_OK); - ASSERT_EQ(table_writer2.flush(), E_OK); - ASSERT_EQ(table_writer2.close(), E_OK); - - TsFileReader table_reader; - ASSERT_EQ(table_reader.open(file_name_), E_OK); - DeviceTimeseriesMetadataMap metadata = - table_reader.get_timeseries_metadata(); - - bool checked_null_tag_group = false; - for (const auto& entry : metadata) { - const auto& device_id = entry.first; - if (device_id == nullptr) { - continue; - } - const std::string device_name = device_id->get_device_name(); - if (device_name.find("null.null.null") == std::string::npos) { - continue; - } - bool checked_f4 = false; - bool checked_f5 = false; - for (const auto& field : entry.second) { - const auto field_name = - field->get_measurement_name().to_std_string(); - if (field_name == "f4" || field_name == "f5") { - ASSERT_NE(field->get_statistic(), nullptr); - EXPECT_EQ(field->get_statistic()->count_, 0); - EXPECT_EQ(field->get_statistic()->start_time_, 0); - EXPECT_EQ(field->get_statistic()->end_time_, 0); - if (field_name == "f4") { - checked_f4 = true; - } else { - checked_f5 = true; - } - } - } - EXPECT_TRUE(checked_f4); - EXPECT_TRUE(checked_f5); - checked_null_tag_group = true; - } - EXPECT_TRUE(checked_null_tag_group); - table_reader.close(); -} - -TEST_F(RestorableTsFileIOWriterTest, - TableWriterRepeatedWriteAfterRecoveryShouldRejectDuplicateTimestamps) { - using namespace std; - const string table_name = "test_table"; - vector column_names = {"t1", "t2", "t3", "f1", "f2", "f3", "f4", - "f5", "f6", "f7", "f8", "f9", "f10"}; - vector data_types = {STRING, STRING, STRING, BOOLEAN, INT32, - INT64, FLOAT, DOUBLE, TEXT, STRING, - BLOB, DATE, TIMESTAMP}; - std::vector column_schemas; - for (size_t i = 0; i < column_names.size(); i++) { - column_schemas.push_back( - new MeasurementSchema(column_names[i], data_types[i])); - } - std::vector column_categories = { - ColumnCategory::TAG, ColumnCategory::TAG, ColumnCategory::TAG, - ColumnCategory::FIELD, ColumnCategory::FIELD, ColumnCategory::FIELD, - ColumnCategory::FIELD, ColumnCategory::FIELD, ColumnCategory::FIELD, - ColumnCategory::FIELD, ColumnCategory::FIELD, ColumnCategory::FIELD, - ColumnCategory::FIELD}; - TableSchema table_schema(table_name, column_schemas, column_categories); - - WriteFile write_file; - ASSERT_EQ(write_file.create(file_name_, GetWriteCreateFlags(), 0666), E_OK); - TsFileTableWriter table_writer(&write_file, &table_schema); - constexpr uint32_t max_rows = 10; - Tablet tablet(table_schema.get_measurement_names(), - table_schema.get_data_types(), max_rows); - tablet.set_table_name(table_name); - for (int row = 0; row < static_cast(max_rows); row++) { - ASSERT_EQ(tablet.add_timestamp(row, static_cast(row)), E_OK); - ASSERT_EQ(tablet.add_value(row, "t1", "device1"), E_OK); - ASSERT_EQ(tablet.add_value(row, "t2", "device2"), E_OK); - ASSERT_EQ(tablet.add_value(row, "t3", "device3"), E_OK); - ASSERT_EQ(tablet.add_value(row, "f1", row % 2 == 0), E_OK); - ASSERT_EQ(tablet.add_value(row, "f2", static_cast(row)), E_OK); - ASSERT_EQ(tablet.add_value(row, "f3", static_cast(row)), E_OK); - ASSERT_EQ(tablet.add_value(row, "f4", static_cast(row * 1.1)), - E_OK); - ASSERT_EQ(tablet.add_value(row, "f5", static_cast(row * 1.1)), - E_OK); - ASSERT_EQ( - tablet.add_value(row, "f6", ("text" + to_string(row)).c_str()), - E_OK); - ASSERT_EQ( - tablet.add_value(row, "f7", ("string" + to_string(row)).c_str()), - E_OK); - ASSERT_EQ( - tablet.add_value(row, "f8", ("blob" + to_string(row)).c_str()), - E_OK); - ASSERT_EQ(tablet.add_value(row, "f9", static_cast(row)), E_OK); - ASSERT_EQ(tablet.add_value(row, "f10", static_cast(row)), - E_OK); - } - ASSERT_EQ(table_writer.write_table(tablet), E_OK); - ASSERT_EQ(table_writer.flush(), E_OK); - ASSERT_EQ(table_writer.close(), E_OK); - ASSERT_EQ(write_file.close(), E_OK); - - vector recovered_column_names = { - "__level1", "__level2", "__level3", "f1", "f2", "f3", "f4", - "f5", "f6", "f7", "f8", "f9", "f10"}; - for (int round = 0; round < 2; ++round) { - CorruptCurrentFileTail(10); - RestorableTsFileIOWriter rw; - ASSERT_EQ(rw.open(file_name_, true), E_OK); - ASSERT_TRUE(rw.can_write()); - - TsFileTableWriter table_writer2(&rw); - Tablet tablet2(recovered_column_names, data_types, max_rows); - tablet2.set_table_name(table_name); - for (int row = 0; row < static_cast(max_rows); row++) { - ASSERT_EQ( - tablet2.add_timestamp(row, static_cast(row + 10)), - E_OK); - ASSERT_EQ(tablet2.add_value(row, "__level1", "device1"), E_OK); - ASSERT_EQ(tablet2.add_value(row, "__level2", "device2"), E_OK); - ASSERT_EQ(tablet2.add_value(row, "__level3", "device3"), E_OK); - ASSERT_EQ(tablet2.add_value(row, "f1", row % 2 == 0), E_OK); - ASSERT_EQ(tablet2.add_value(row, "f2", static_cast(row)), - E_OK); - ASSERT_EQ(tablet2.add_value(row, "f3", static_cast(row)), - E_OK); - ASSERT_EQ( - tablet2.add_value(row, "f4", static_cast(row * 1.1)), - E_OK); - ASSERT_EQ( - tablet2.add_value(row, "f5", static_cast(row * 1.1)), - E_OK); - ASSERT_EQ( - tablet2.add_value(row, "f6", ("text" + to_string(row)).c_str()), - E_OK); - ASSERT_EQ(tablet2.add_value(row, "f7", - ("string" + to_string(row)).c_str()), - E_OK); - ASSERT_EQ( - tablet2.add_value(row, "f8", ("blob" + to_string(row)).c_str()), - E_OK); - ASSERT_EQ(tablet2.add_value(row, "f9", static_cast(row)), - E_OK); - ASSERT_EQ(tablet2.add_value(row, "f10", static_cast(row)), - E_OK); - } - if (round == 0) { - ASSERT_EQ(table_writer2.write_table(tablet2), E_OK); - ASSERT_EQ(table_writer2.flush(), E_OK); - } else { - ASSERT_EQ(table_writer2.write_table(tablet2), E_OUT_OF_ORDER); - } - ASSERT_EQ(table_writer2.close(), E_OK); - } -} \ No newline at end of file diff --git a/cpp/test/reader/query_by_row_performance_test.cc b/cpp/test/reader/query_by_row_performance_test.cc index 4caf26f71..2154c73ba 100644 --- a/cpp/test/reader/query_by_row_performance_test.cc +++ b/cpp/test/reader/query_by_row_performance_test.cc @@ -86,7 +86,8 @@ static int query_by_row_perf_iters() { return n; } -static int compute_offset_with_env(int num_rows, int default_offset) { +[[maybe_unused]] static int compute_offset_with_env(int num_rows, + int default_offset) { int offset = default_offset; int abs = 0; if (get_env_int("QUERY_BY_ROW_PERF_OFFSET", abs)) { @@ -115,7 +116,7 @@ static void write_result_if_needed(const std::string& md) { } // Entire suite skipped in default runs -class DISABLED_QueryByRowPerformanceTest : public ::testing::Test { +class QueryByRowPerformanceTest : public ::testing::Test { protected: void SetUp() override { libtsfile_init(); @@ -319,7 +320,7 @@ static void compute_avg_times(RunByRowFn&& run_by_row, RunManualFn&& run_manual, avg_manual = (valid_iters > 0) ? (sum_manual / valid_iters) : -1.0; } -TEST_F(DISABLED_QueryByRowPerformanceTest, TreeModel_SingleSequence) { +TEST_F(QueryByRowPerformanceTest, TreeModel_SingleSequence) { const std::vector measurement_ids = {"s1"}; write_tree_multi_device_file(kNumRowsTotal, kDeviceCount, measurement_ids, kNoneProbSingle, /*none_prob_s2=*/0.0, 123); @@ -415,7 +416,7 @@ TEST_F(DISABLED_QueryByRowPerformanceTest, TreeModel_SingleSequence) { EXPECT_GT(best_speedup, 1.0); } -TEST_F(DISABLED_QueryByRowPerformanceTest, TreeModel_MultiSequence) { +TEST_F(QueryByRowPerformanceTest, TreeModel_MultiSequence) { const std::vector measurement_ids = {"s1", "s2"}; write_tree_multi_device_file(kNumRowsTotal, kDeviceCount, measurement_ids, kNoneProbMultiS1, kNoneProbS2, 456); @@ -513,7 +514,7 @@ TEST_F(DISABLED_QueryByRowPerformanceTest, TreeModel_MultiSequence) { EXPECT_GT(best_speedup, 1.0); } -TEST_F(DISABLED_QueryByRowPerformanceTest, TableModel_SingleSequence) { +TEST_F(QueryByRowPerformanceTest, TableModel_SingleSequence) { write_table_multi_device_file(kNumRowsTotal, kDeviceCount, kNoneProbSingle, 0.0, 789); const std::vector cols = {"id1", "s1"}; @@ -609,7 +610,7 @@ TEST_F(DISABLED_QueryByRowPerformanceTest, TableModel_SingleSequence) { EXPECT_GT(best_speedup, 1.0); } -TEST_F(DISABLED_QueryByRowPerformanceTest, TableModel_MultiSequence) { +TEST_F(QueryByRowPerformanceTest, TableModel_MultiSequence) { write_table_multi_device_file(kNumRowsTotal, kDeviceCount, kNoneProbMultiS1, kNoneProbS2, 101); const std::vector cols = {"id1", "s1", "s2"}; diff --git a/cpp/test/reader/table_view/tsfile_reader_table_batch_test.cc b/cpp/test/reader/table_view/tsfile_reader_table_batch_test.cc index e115552ec..6e2da1c40 100644 --- a/cpp/test/reader/table_view/tsfile_reader_table_batch_test.cc +++ b/cpp/test/reader/table_view/tsfile_reader_table_batch_test.cc @@ -133,6 +133,25 @@ class TsFileTableReaderBatchTest : public ::testing::Test { column_categories); } + static TableSchema* gen_table_schema_with_string_field() { + std::vector measurement_schemas; + std::vector column_categories; + measurement_schemas.emplace_back( + new MeasurementSchema("id0", TSDataType::STRING, TSEncoding::PLAIN, + CompressionType::UNCOMPRESSED)); + column_categories.emplace_back(ColumnCategory::TAG); + measurement_schemas.emplace_back(new MeasurementSchema( + "s_text", TSDataType::STRING, TSEncoding::PLAIN, + CompressionType::UNCOMPRESSED)); + column_categories.emplace_back(ColumnCategory::FIELD); + measurement_schemas.emplace_back( + new MeasurementSchema("s_num", TSDataType::INT64, TSEncoding::PLAIN, + CompressionType::UNCOMPRESSED)); + column_categories.emplace_back(ColumnCategory::FIELD); + return new TableSchema("testTableString", measurement_schemas, + column_categories); + } + static storage::Tablet gen_tablet(TableSchema* table_schema, int offset, int device_num, int num_timestamp_per_device = 10) { @@ -171,6 +190,121 @@ class TsFileTableReaderBatchTest : public ::testing::Test { delete[] literal; return tablet; } + + static storage::Tablet gen_tablet_with_string_field( + TableSchema* table_schema, int num_rows) { + storage::Tablet tablet(table_schema->get_table_name(), + table_schema->get_measurement_names(), + table_schema->get_data_types(), + table_schema->get_column_categories(), num_rows); + for (int i = 0; i < num_rows; i++) { + tablet.add_timestamp(i, i); + tablet.add_value(i, "id0", "device_a"); + tablet.add_value(i, "s_text", "value_" + std::to_string(i)); + tablet.add_value(i, "s_num", static_cast(i * 10)); + } + return tablet; + } + + std::vector query_timestamps_in_batches(TableSchema* table_schema, + int64_t start_time, + int64_t end_time, + int batch_size) { + storage::TsFileReader reader; + int ret = reader.open(file_name_); + EXPECT_EQ(ret, common::E_OK); + + ResultSet* tmp_result_set = nullptr; + ret = reader.query(table_schema->get_table_name(), + table_schema->get_measurement_names(), start_time, + end_time, tmp_result_set, batch_size); + EXPECT_EQ(ret, common::E_OK); + EXPECT_NE(tmp_result_set, nullptr); + + auto* table_result_set = dynamic_cast(tmp_result_set); + EXPECT_NE(table_result_set, nullptr); + + std::vector timestamps; + common::TsBlock* block = nullptr; + while ((ret = table_result_set->get_next_tsblock(block)) == + common::E_OK) { + if (block == nullptr) { + ADD_FAILURE() << "Expected non-null TsBlock"; + break; + } + common::RowIterator row_iterator(block); + while (row_iterator.has_next()) { + uint32_t len = 0; + bool null = false; + int64_t timestamp = *reinterpret_cast( + row_iterator.read(0, &len, &null)); + EXPECT_FALSE(null); + timestamps.push_back(timestamp); + + for (uint32_t col_idx = 1; + col_idx < row_iterator.get_column_count(); ++col_idx) { + const char* value = row_iterator.read(col_idx, &len, &null); + EXPECT_FALSE(null); + if (row_iterator.get_data_type(col_idx) == + TSDataType::INT64) { + int64_t int_val = + *reinterpret_cast(value); + EXPECT_EQ(int_val, 0); + } + } + row_iterator.next(); + } + } + + reader.destroy_query_data_set(table_result_set); + EXPECT_EQ(reader.close(), common::E_OK); + return timestamps; + } + + std::vector> query_string_field_in_batches( + TableSchema* table_schema, int64_t start_time, int64_t end_time, + int batch_size) { + storage::TsFileReader reader; + int ret = reader.open(file_name_); + EXPECT_EQ(ret, common::E_OK); + + ResultSet* tmp_result_set = nullptr; + ret = reader.query(table_schema->get_table_name(), + table_schema->get_measurement_names(), start_time, + end_time, tmp_result_set, batch_size); + EXPECT_EQ(ret, common::E_OK); + EXPECT_NE(tmp_result_set, nullptr); + + auto* table_result_set = dynamic_cast(tmp_result_set); + EXPECT_NE(table_result_set, nullptr); + + std::vector> result; + common::TsBlock* block = nullptr; + while ((ret = table_result_set->get_next_tsblock(block)) == + common::E_OK) { + if (block == nullptr) { + ADD_FAILURE() << "Expected non-null TsBlock"; + break; + } + common::RowIterator row_iterator(block); + while (row_iterator.has_next()) { + uint32_t len = 0; + bool null = false; + int64_t timestamp = *reinterpret_cast( + row_iterator.read(0, &len, &null)); + EXPECT_FALSE(null); + + const char* value = row_iterator.read(2, &len, &null); + EXPECT_FALSE(null); + result.emplace_back(timestamp, std::string(value, len)); + row_iterator.next(); + } + } + + reader.destroy_query_data_set(table_result_set); + EXPECT_EQ(reader.close(), common::E_OK); + return result; + } }; TEST_F(TsFileTableReaderBatchTest, BatchQueryWithSmallBatchSize) { @@ -361,6 +495,89 @@ TEST_F(TsFileTableReaderBatchTest, BatchQueryVerifyDataCorrectness) { delete table_schema; } +TEST_F(TsFileTableReaderBatchTest, + BatchQueryKeepsStateAcrossTsBlocksWithinPage) { + auto table_schema = gen_table_schema(); + auto tsfile_table_writer_ = + std::make_shared(&write_file_, table_schema); + + const int prev_page_point_num = g_config_value_.page_writer_max_point_num_; + g_config_value_.page_writer_max_point_num_ = 128; + + const int device_num = 1; + const int points_per_device = 35; + auto tablet = gen_tablet(table_schema, 0, device_num, points_per_device); + ASSERT_EQ(tsfile_table_writer_->write_table(tablet), common::E_OK); + ASSERT_EQ(tsfile_table_writer_->flush(), common::E_OK); + ASSERT_EQ(tsfile_table_writer_->close(), common::E_OK); + + const int batch_size = 8; + std::vector timestamps = query_timestamps_in_batches( + table_schema, 0, 1000000000000LL, batch_size); + + ASSERT_EQ(timestamps.size(), static_cast(points_per_device)); + for (int64_t i = 0; i < points_per_device; ++i) { + EXPECT_EQ(timestamps[i], i); + } + + g_config_value_.page_writer_max_point_num_ = prev_page_point_num; + delete table_schema; +} + +TEST_F(TsFileTableReaderBatchTest, BatchQueryTimeFilterAcrossBoundaryPages) { + auto table_schema = gen_table_schema(); + auto tsfile_table_writer_ = + std::make_shared(&write_file_, table_schema); + + const int prev_page_point_num = g_config_value_.page_writer_max_point_num_; + g_config_value_.page_writer_max_point_num_ = 8; + + const int device_num = 1; + const int points_per_device = 25; + auto tablet = gen_tablet(table_schema, 0, device_num, points_per_device); + ASSERT_EQ(tsfile_table_writer_->write_table(tablet), common::E_OK); + ASSERT_EQ(tsfile_table_writer_->flush(), common::E_OK); + ASSERT_EQ(tsfile_table_writer_->close(), common::E_OK); + + const int batch_size = 4; + std::vector timestamps = + query_timestamps_in_batches(table_schema, 5, 18, batch_size); + + ASSERT_EQ(timestamps.size(), static_cast(14)); + for (int64_t i = 0; i < 14; ++i) { + EXPECT_EQ(timestamps[i], i + 5); + } + + g_config_value_.page_writer_max_point_num_ = prev_page_point_num; + delete table_schema; +} + +TEST_F(TsFileTableReaderBatchTest, + BatchQueryVariableLengthFieldAcrossTsBlocks) { + auto table_schema = gen_table_schema_with_string_field(); + auto tsfile_table_writer_ = + std::make_shared(&write_file_, table_schema); + + const int prev_page_point_num = g_config_value_.page_writer_max_point_num_; + g_config_value_.page_writer_max_point_num_ = 8; + + const int num_rows = 23; + auto tablet = gen_tablet_with_string_field(table_schema, num_rows); + ASSERT_EQ(tsfile_table_writer_->write_table(tablet), common::E_OK); + ASSERT_EQ(tsfile_table_writer_->flush(), common::E_OK); + ASSERT_EQ(tsfile_table_writer_->close(), common::E_OK); + + auto result = query_string_field_in_batches(table_schema, 0, INT64_MAX, 5); + ASSERT_EQ(result.size(), static_cast(num_rows)); + for (int i = 0; i < num_rows; ++i) { + EXPECT_EQ(result[i].first, i); + EXPECT_EQ(result[i].second, "value_" + std::to_string(i)); + } + + g_config_value_.page_writer_max_point_num_ = prev_page_point_num; + delete table_schema; +} + TEST_F(TsFileTableReaderBatchTest, PerformanceComparisonSinglePointVsBatch) { // Create table schema without tags (only fields) auto table_schema = gen_table_schema_no_tag(); diff --git a/cpp/test/reader/table_view/tsfile_reader_table_test.cc b/cpp/test/reader/table_view/tsfile_reader_table_test.cc index 1f63573e1..b9f0eb213 100644 --- a/cpp/test/reader/table_view/tsfile_reader_table_test.cc +++ b/cpp/test/reader/table_view/tsfile_reader_table_test.cc @@ -216,21 +216,6 @@ TEST_F(TsFileTableReaderTest, TableModelQueryOneSmallPage) { g_config_value_.page_writer_max_point_num_ = prev_config; } -// Triggers memory-based seal in aligned table: time page seals by size while -// value pages may not; ensure value pages are sealed together with time (no -// time-page-sealed / value-page-not-sealed inconsistency). -// Use 512 bytes so time seals by size before point count; 128 was too small -// and could produce misaligned time/value pages on some encodings. -TEST_F(TsFileTableReaderTest, TableModelQueryMemoryBasedSeal) { - uint32_t prev_point_num = g_config_value_.page_writer_max_point_num_; - uint32_t prev_mem_bytes = g_config_value_.page_writer_max_memory_bytes_; - g_config_value_.page_writer_max_point_num_ = 10000; - g_config_value_.page_writer_max_memory_bytes_ = 512; - test_table_model_query(50, 1); - g_config_value_.page_writer_max_point_num_ = prev_point_num; - g_config_value_.page_writer_max_memory_bytes_ = prev_mem_bytes; -} - TEST_F(TsFileTableReaderTest, TableModelQueryOneLargePage) { int prev_config = g_config_value_.page_writer_max_point_num_; g_config_value_.page_writer_max_point_num_ = 10000; @@ -803,86 +788,3 @@ TEST_F(TsFileTableReaderTest, TestTimeColumnReader) { reader.destroy_query_data_set(table_result_set); ASSERT_EQ(reader.close(), common::E_OK); } - -// Regression test: AlignedChunkReader NULL branch overflow drops rows. -// When a TsBlock is full (block_size=1024) and the next row to decode is a -// NULL value in aligned data, the old code consumed the timestamp before -// checking add_row(), silently losing that row on E_OVERFLOW. -TEST_F(TsFileTableReaderTest, AlignedNullAtBlockBoundaryNoRowLoss) { - // block_size in RETURN_ROW mode is 1024. - const int32_t block_size = 1024; - // Write enough rows so that overflow happens multiple times, - // and place NULLs exactly at every block boundary. - const int32_t total_rows = block_size * 4; // 4096 rows - - std::string table_name = "null_boundary"; - auto* schema = new storage::TableSchema( - table_name, - { - common::ColumnSchema("tag1", common::TSDataType::STRING, - common::ColumnCategory::TAG), - // s_nullable: NULL at every block_size boundary - common::ColumnSchema("s_nullable", common::TSDataType::INT64, - common::ColumnCategory::FIELD), - // s_full: always has a value (control group) - common::ColumnSchema("s_full", common::TSDataType::INT64, - common::ColumnCategory::FIELD), - }); - - auto* writer = - new storage::TsFileTableWriter(&write_file_, schema, 128 * 1024 * 1024); - - storage::Tablet tablet( - {"tag1", "s_nullable", "s_full"}, - {common::TSDataType::STRING, common::TSDataType::INT64, - common::TSDataType::INT64}, - total_rows); - - for (int32_t i = 0; i < total_rows; i++) { - tablet.add_timestamp(i, static_cast(i)); - tablet.add_value(i, "tag1", "device0"); - tablet.add_value(i, "s_full", static_cast(i)); - // Make row at every block_size boundary NULL for s_nullable. - // These are exactly the rows that trigger E_OVERFLOW in the decoder. - if (i % block_size != 0) { - tablet.add_value(i, "s_nullable", static_cast(i)); - } - // else: s_nullable is NULL at i=0, 1024, 2048, 3072 - } - - ASSERT_EQ(writer->write_table(tablet), common::E_OK); - ASSERT_EQ(writer->flush(), common::E_OK); - ASSERT_EQ(writer->close(), common::E_OK); - delete writer; - delete schema; - - storage::TsFileReader reader; - ASSERT_EQ(reader.open(file_name_), common::E_OK); - - // Helper: query a single column and count rows. - auto count_rows = [&](const std::string& col) -> int64_t { - storage::ResultSet* rs = nullptr; - int ret = reader.query(table_name, {col}, 0, INT64_MAX, rs); - EXPECT_EQ(ret, common::E_OK); - if (rs == nullptr) return -1; - auto* trs = dynamic_cast(rs); - bool hn = false; - int64_t cnt = 0; - while (trs->next(hn) == common::E_OK && hn) { - cnt++; - } - reader.destroy_query_data_set(rs); - return cnt; - }; - - int64_t full_rows = count_rows("s_full"); - int64_t nullable_rows = count_rows("s_nullable"); - - // Both columns must return the same number of rows. - // Before the fix, s_nullable would lose one row per overflow at a NULL - // boundary, yielding fewer rows than s_full. - ASSERT_EQ(full_rows, total_rows); - ASSERT_EQ(nullable_rows, total_rows); - - ASSERT_EQ(reader.close(), common::E_OK); -} \ No newline at end of file diff --git a/cpp/test/reader/table_view/tsfile_table_query_by_row_test.cc b/cpp/test/reader/table_view/tsfile_table_query_by_row_test.cc index 026f75b2d..4e6d1a86d 100644 --- a/cpp/test/reader/table_view/tsfile_table_query_by_row_test.cc +++ b/cpp/test/reader/table_view/tsfile_table_query_by_row_test.cc @@ -27,7 +27,6 @@ #include "common/schema.h" #include "common/tablet.h" #include "file/write_file.h" -#include "reader/filter/tag_filter.h" #include "reader/table_result_set.h" #include "reader/tsfile_reader.h" #include "writer/tsfile_table_writer.h" @@ -103,6 +102,41 @@ class TableQueryByRowTest : public ::testing::Test { delete schema; } + void write_single_device_file_with_string_field(int num_rows) { + std::vector col_schemas = { + ColumnSchema("id1", TSDataType::STRING, + CompressionType::UNCOMPRESSED, TSEncoding::PLAIN, + ColumnCategory::TAG), + ColumnSchema("s_text", TSDataType::STRING, + CompressionType::UNCOMPRESSED, TSEncoding::PLAIN, + ColumnCategory::FIELD), + ColumnSchema("s_num", TSDataType::INT64, + CompressionType::UNCOMPRESSED, TSEncoding::PLAIN, + ColumnCategory::FIELD), + }; + auto* schema = new TableSchema("t_string", col_schemas); + auto* writer = new TsFileTableWriter(&write_file_, schema); + + Tablet tablet( + "t_string", {"id1", "s_text", "s_num"}, + {TSDataType::STRING, TSDataType::STRING, TSDataType::INT64}, + {ColumnCategory::TAG, ColumnCategory::FIELD, ColumnCategory::FIELD}, + num_rows); + + for (int i = 0; i < num_rows; i++) { + tablet.add_timestamp(i, static_cast(i)); + tablet.add_value(i, "id1", "device_a"); + tablet.add_value(i, "s_text", "value_" + std::to_string(i)); + tablet.add_value(i, "s_num", static_cast(i * 10)); + } + + ASSERT_EQ(writer->write_table(tablet), E_OK); + ASSERT_EQ(writer->flush(), E_OK); + ASSERT_EQ(writer->close(), E_OK); + delete writer; + delete schema; + } + void write_multi_device_file(int rows_per_device, int device_count) { std::vector col_schemas = { ColumnSchema("id1", TSDataType::STRING, @@ -341,6 +375,29 @@ class TableQueryByRowTest : public ::testing::Test { return manual; } + std::vector> query_by_row_time_and_text( + const std::string& table_name, const std::vector& cols, + int offset, int limit) { + TsFileReader reader; + EXPECT_EQ(reader.open(file_name_), E_OK); + ResultSet* rs = nullptr; + EXPECT_EQ(reader.queryByRow(table_name, cols, offset, limit, rs), E_OK); + EXPECT_NE(rs, nullptr); + + std::vector> result; + bool has_next = false; + while (IS_SUCC(rs->next(has_next)) && has_next) { + int64_t time = rs->get_value("time"); + common::String* text_val = rs->get_value("s_text"); + result.emplace_back(time, + std::string(text_val->buf_, text_val->len_)); + } + + reader.destroy_query_data_set(rs); + reader.close(); + return result; + } + std::string file_name_; WriteFile write_file_; }; @@ -356,6 +413,23 @@ TEST_F(TableQueryByRowTest, NoOffsetNoLimit) { ASSERT_EQ(result, all); } +TEST_F(TableQueryByRowTest, NoOffsetNoLimitWithSmallPages) { + int prev_page_config = g_config_value_.page_writer_max_point_num_; + g_config_value_.page_writer_max_point_num_ = 8; + + int num_rows = 25; + write_single_device_file(num_rows); + + auto result = query_by_row_time_and_s1("t1", {"id1", "s1", "s2"}, 0, -1); + ASSERT_EQ(result.size(), static_cast(num_rows)); + for (int i = 0; i < num_rows; ++i) { + EXPECT_EQ(result[i].first, i); + EXPECT_EQ(result[i].second, i * 10); + } + + g_config_value_.page_writer_max_point_num_ = prev_page_config; +} + // Offset only: skip first N rows, return the rest; limit=-1 means no cap. TEST_F(TableQueryByRowTest, OffsetOnly) { int num_rows = 50; @@ -399,6 +473,43 @@ TEST_F(TableQueryByRowTest, OffsetAndLimit) { } } +TEST_F(TableQueryByRowTest, OffsetAndLimitWithSmallPages) { + int prev_page_config = g_config_value_.page_writer_max_point_num_; + g_config_value_.page_writer_max_point_num_ = 8; + + int num_rows = 40; + write_single_device_file(num_rows); + + int offset = 7; + int limit = 19; + auto by_row = + query_by_row_time_and_s1("t1", {"id1", "s1", "s2"}, offset, limit); + auto manual = + query_manual_time_and_s1("t1", {"id1", "s1", "s2"}, offset, limit); + + ASSERT_EQ(by_row, manual); + + g_config_value_.page_writer_max_point_num_ = prev_page_config; +} + +TEST_F(TableQueryByRowTest, VariableLengthFieldWithSmallPages) { + int prev_page_config = g_config_value_.page_writer_max_point_num_; + g_config_value_.page_writer_max_point_num_ = 8; + + int num_rows = 21; + write_single_device_file_with_string_field(num_rows); + + auto result = query_by_row_time_and_text("t_string", + {"id1", "s_text", "s_num"}, 0, -1); + ASSERT_EQ(result.size(), static_cast(num_rows)); + for (int i = 0; i < num_rows; ++i) { + EXPECT_EQ(result[i].first, i); + EXPECT_EQ(result[i].second, "value_" + std::to_string(i)); + } + + g_config_value_.page_writer_max_point_num_ = prev_page_config; +} + // Offset beyond total row count: returns empty result. TEST_F(TableQueryByRowTest, OffsetBeyondData) { int num_rows = 30; @@ -652,15 +763,16 @@ TEST_F(TableQueryByRowTest, DenseSingleDeviceSsiLevelPushdown) { // Pushdown is faster than full query + manual next: queryByRow(offset, limit) // skips at device/SSI/Chunk level; old query then manual next decodes every -// row. Timing tolerance 20% to allow measurement noise. -TEST_F(TableQueryByRowTest, DISABLED_QueryByRowFasterThanManualNext) { - const int num_rows = 8000; - const int offset = 3000; +// row. Timing tolerance 5% to allow measurement noise. +TEST_F(TableQueryByRowTest, QueryByRowFasterThanManualNext) { + const int num_rows = 80000; + const int offset = 30000; const int limit = 1000; write_single_device_file(num_rows); const int num_iters = 5; - const double tolerance = 0.2; + const double tolerance = + 0.5; // 50% tolerance for cross-platform timing noise auto run_query_by_row = [this, offset, limit]() { TsFileReader reader; @@ -725,47 +837,3 @@ TEST_F(TableQueryByRowTest, DISABLED_QueryByRowFasterThanManualNext) { "(min_by_row=" << min_by_row << " ms, min_manual=" << min_manual << " ms)"; } - -// queryByRow with tag filter: only rows matching the tag predicate are -// returned. -TEST_F(TableQueryByRowTest, TagFilterEq) { - int rows_per_device = 20; - int device_count = 3; - write_multi_device_file(rows_per_device, device_count); - - // Reconstruct the same schema used by write_multi_device_file. - std::vector col_schemas = { - ColumnSchema("id1", TSDataType::STRING, CompressionType::UNCOMPRESSED, - TSEncoding::PLAIN, ColumnCategory::TAG), - ColumnSchema("s1", TSDataType::INT64, CompressionType::UNCOMPRESSED, - TSEncoding::PLAIN, ColumnCategory::FIELD), - }; - TableSchema schema("t1", col_schemas); - - // Build tag filter: id1 == "dev1" - TagFilterBuilder builder(&schema); - Filter* tag_filter = builder.eq("id1", "dev1"); - - TsFileReader reader; - ASSERT_EQ(reader.open(file_name_), E_OK); - - ResultSet* rs = nullptr; - ASSERT_EQ(reader.queryByRow("t1", {"id1", "s1"}, 0, -1, rs, tag_filter), - E_OK); - ASSERT_NE(rs, nullptr); - - std::vector filtered_s1; - bool has_next = false; - while (IS_SUCC(rs->next(has_next)) && has_next) { - filtered_s1.push_back(rs->get_value("s1")); - } - reader.destroy_query_data_set(rs); - reader.close(); - delete tag_filter; - - // dev1 has rows_per_device rows with s1 = 1*1000+t for t in [0,20). - ASSERT_EQ(filtered_s1.size(), static_cast(rows_per_device)); - for (int t = 0; t < rows_per_device; t++) { - EXPECT_EQ(filtered_s1[t], static_cast(1 * 1000 + t)); - } -} diff --git a/cpp/test/reader/tree_view/tsfile_reader_tree_test.cc b/cpp/test/reader/tree_view/tsfile_reader_tree_test.cc index 8181b6130..aa4ff2544 100644 --- a/cpp/test/reader/tree_view/tsfile_reader_tree_test.cc +++ b/cpp/test/reader/tree_view/tsfile_reader_tree_test.cc @@ -24,7 +24,6 @@ #include "common/schema.h" #include "common/tablet.h" #include "file/write_file.h" -#include "reader/result_set.h" #include "reader/tsfile_reader.h" #include "reader/tsfile_tree_reader.h" #include "writer/tsfile_table_writer.h" @@ -426,86 +425,3 @@ TEST_F(TsFileTreeReaderTest, ExtendedRowsAndColumnsTest) { delete measurement; } } - -// Regression test: query_table_on_tree on a device path with three or more -// dot-segments (e.g. "root.sensors.TH") previously SEGVed because: -// 1. StringArrayDeviceID split "root.sensors.TH" into ["root","sensors","TH"] -// instead of the correct ["root.sensors","TH"], so get_table_name() returned -// "root" instead of "root.sensors". -// 2. load_device_index_entry used operator[] on the table map which inserted a -// null entry, then asserted on it. -TEST_F(TsFileTreeReaderTest, QueryTableOnTreeDeepDevicePath) { - TsFileTreeWriter writer(&write_file_); - // Device paths with 3 dot-segments: table_name="root.sensors", device="TH" - std::string device_id = "root.sensors.TH"; - std::string m_temp = "temperature"; - std::string m_humi = "humidity"; - auto* ms_temp = new MeasurementSchema(m_temp, INT32); - auto* ms_humi = new MeasurementSchema(m_humi, INT32); - ASSERT_EQ(E_OK, writer.register_timeseries(device_id, ms_temp)); - ASSERT_EQ(E_OK, writer.register_timeseries(device_id, ms_humi)); - delete ms_temp; - delete ms_humi; - - for (int ts = 0; ts < 5; ts++) { - TsRecord rec(device_id, ts); - rec.add_point(m_temp, static_cast(20 + ts)); - rec.add_point(m_humi, static_cast(50 + ts)); - ASSERT_EQ(E_OK, writer.write(rec)); - } - writer.flush(); - writer.close(); - - TsFileReader reader; - ASSERT_EQ(E_OK, reader.open(file_name_)); - ResultSet* result; - // query_table_on_tree used to SEGV here due to wrong table-name lookup - ASSERT_EQ(E_OK, reader.query_table_on_tree({m_temp, m_humi}, INT64_MIN, - INT64_MAX, result)); - - auto* trs = static_cast(result); - bool has_next = false; - int row_cnt = 0; - while (IS_SUCC(trs->next(has_next)) && has_next) { - row_cnt++; - } - EXPECT_EQ(row_cnt, 5); - reader.destroy_query_data_set(result); - reader.close(); -} - -// Regression test: load_device_index_entry previously used operator[] to look -// up the table node, which silently inserted a null entry and then asserted. -// After the fix it uses find() and returns E_DEVICE_NOT_EXIST gracefully. -// This is triggered when querying a measurement that no device in the file has. -TEST_F(TsFileTreeReaderTest, QueryTableOnTreeMissingMeasurement) { - // Use the same multi-device setup as ReadTreeByTable to ensure a valid - // file. - TsFileTreeWriter writer(&write_file_); - std::vector device_ids = {"root.db1.t1", "root.db2.t1"}; - std::string m_temp = "temperature"; - for (auto dev : device_ids) { - auto* ms = new MeasurementSchema(m_temp, INT32); - ASSERT_EQ(E_OK, writer.register_timeseries(dev, ms)); - delete ms; - TsRecord rec(dev, 0); - rec.add_point(m_temp, static_cast(25)); - ASSERT_EQ(E_OK, writer.write(rec)); - } - writer.flush(); - writer.close(); - - TsFileReader reader; - ASSERT_EQ(E_OK, reader.open(file_name_)); - ResultSet* result = nullptr; - // "nonexistent" is not present in any device. Before the fix, - // load_device_index_entry used operator[] which inserted null and crashed. - // After the fix it returns E_DEVICE_NOT_EXIST or E_COLUMN_NOT_EXIST. - int ret = reader.query_table_on_tree({"nonexistent"}, INT64_MIN, INT64_MAX, - result); - EXPECT_NE(ret, E_OK); // Must not succeed (measurement not found) - if (result != nullptr) { - reader.destroy_query_data_set(result); - } - reader.close(); -} diff --git a/cpp/test/reader/tree_view/tsfile_tree_query_by_row_test.cc b/cpp/test/reader/tree_view/tsfile_tree_query_by_row_test.cc index a686b8998..56f8c113a 100644 --- a/cpp/test/reader/tree_view/tsfile_tree_query_by_row_test.cc +++ b/cpp/test/reader/tree_view/tsfile_tree_query_by_row_test.cc @@ -16,7 +16,6 @@ * specific language governing permissions and limitations * under the License. */ -#include #include #include @@ -25,114 +24,14 @@ #include "common/global.h" #include "common/record.h" #include "common/schema.h" -#include "common/tablet.h" #include "file/write_file.h" #include "reader/tsfile_reader.h" #include "reader/tsfile_tree_reader.h" #include "writer/tsfile_tree_writer.h" -#include "writer/tsfile_writer.h" using namespace storage; using namespace common; -namespace { - -int write_multi_device_data_tablet( - const std::vector>>& - devices_and_measurements, - const std::vector& data_types, int row_count, - const std::string& file_path) { - TsFileWriter tsfile_writer; - int flags = O_WRONLY | O_CREAT | O_TRUNC; -#ifdef _WIN32 - flags |= O_BINARY; -#endif - mode_t mode = 0666; - int ret = tsfile_writer.open(file_path, flags, mode); - if (ret != E_OK) { - return ret; - } - for (auto& device_pair : devices_and_measurements) { - const std::vector& measurements = device_pair.second; - if (measurements.size() != data_types.size()) { - return E_INVALID_ARG; - } - } - for (auto& device_pair : devices_and_measurements) { - const std::string& device_id = device_pair.first; - const std::vector& measurements = device_pair.second; - for (size_t i = 0; i < measurements.size(); i++) { - MeasurementSchema schema(measurements[i], data_types[i]); - ret = tsfile_writer.register_timeseries(device_id, schema); - if (ret != E_OK) { - return ret; - } - } - } - for (auto& device_pair : devices_and_measurements) { - const std::string& device_id = device_pair.first; - const std::vector& measurements = device_pair.second; - auto schema_ptr = std::make_shared>(); - for (size_t i = 0; i < measurements.size(); i++) { - schema_ptr->emplace_back(measurements[i], data_types[i]); - } - Tablet tablet(device_id, schema_ptr, row_count); - for (int row = 0; row < row_count; row++) { - ret = tablet.add_timestamp(row, row); - if (ret != E_OK) { - return ret; - } - for (size_t col = 0; col < measurements.size(); col++) { - if ((static_cast(row) % 2) == (col % 2)) { - continue; - } - switch (data_types[col]) { - case BOOLEAN: - ret = tablet.add_value(row, col, (row % 2 != 0)); - break; - case INT32: - ret = tablet.add_value(row, col, - static_cast(row)); - break; - case INT64: - ret = tablet.add_value(row, col, - static_cast(row)); - break; - case FLOAT: - ret = - tablet.add_value(row, col, static_cast(row)); - break; - case DOUBLE: - ret = tablet.add_value(row, col, - static_cast(row)); - break; - case STRING: { - std::string val_str = "string" + std::to_string(row); - ret = tablet.add_value(row, col, val_str.c_str()); - break; - } - default: - return E_TYPE_NOT_MATCH; - } - if (ret != E_OK) { - return ret; - } - } - } - ret = tsfile_writer.write_tablet(tablet); - if (ret != E_OK) { - return ret; - } - } - ret = tsfile_writer.flush(); - if (ret != E_OK) { - return ret; - } - return tsfile_writer.close(); -} - -} // namespace - class TreeQueryByRowTest : public ::testing::Test { protected: void SetUp() override { @@ -234,113 +133,6 @@ TEST_F(TreeQueryByRowTest, NoOffsetNoLimit) { reader.close(); } -// queryByRow skips paths whose device or measurement is missing in the file; -// only existing series are returned (aligned with Java tree reader). -TEST_F(TreeQueryByRowTest, QueryByRow_SkipsMissingDeviceAndMeasurement) { - std::vector devices = {"d1"}; - std::vector measurements = {"s1"}; - const int num_rows = 5; - write_test_file(devices, measurements, num_rows); - - TsFileTreeReader reader; - ASSERT_EQ(E_OK, reader.open(file_name_)); - - ResultSet* result = nullptr; - std::vector q_devices = {"d1", "d999"}; - std::vector q_meas = {"s1", "ghost_m"}; - ASSERT_EQ(E_OK, reader.queryByRow(q_devices, q_meas, 0, -1, result)); - ASSERT_NE(result, nullptr); - - auto meta = result->get_metadata(); - ASSERT_EQ(2u, meta->get_column_count()); - - bool has_next = false; - int row_count = 0; - while (IS_SUCC(result->next(has_next)) && has_next) { - RowRecord* rr = result->get_row_record(); - int64_t ts = rr->get_timestamp(); - ASSERT_EQ(ts, static_cast(row_count)); - Field* f = rr->get_field(1); - ASSERT_NE(f, nullptr); - ASSERT_EQ(f->type_, INT64); - EXPECT_EQ(f->get_value(), static_cast(ts * 100 + 0)); - row_count++; - } - EXPECT_EQ(row_count, num_rows); - - reader.destroy_query_data_set(result); - reader.close(); -} - -TEST_F(TreeQueryByRowTest, QueryByRow_TabletMultiType_PartialPaths) { - std::string tablet_path = std::string("tree_query_by_row_tablet_") + - generate_random_string(10) + ".tsfile"; - remove(tablet_path.c_str()); - - std::vector devices = {"root.db.d1"}; - std::vector measurement_names = {"bool_col", "int32_col", - "int64_col", "float_col", - "double_col", "string_col"}; - std::vector>> - devices_and_measurements = {{devices[0], measurement_names}}; - std::vector data_types = {BOOLEAN, INT32, INT64, - FLOAT, DOUBLE, STRING}; - const int total_rows = 10; - ASSERT_EQ(E_OK, write_multi_device_data_tablet(devices_and_measurements, - data_types, total_rows, - tablet_path)); - - TsFileTreeReader reader; - ASSERT_EQ(E_OK, reader.open(tablet_path)); - - std::vector q_devices = {devices[0], "d999"}; - std::vector q_meas = {measurement_names[0], - measurement_names[1], "ghost_m"}; - ResultSet* result_set2 = nullptr; - ASSERT_EQ(E_OK, reader.queryByRow(q_devices, q_meas, 0, -1, result_set2)); - ASSERT_NE(result_set2, nullptr); - auto meta2 = result_set2->get_metadata(); - // Metadata includes the time column plus one entry per resolved series. - ASSERT_EQ(3u, meta2->get_column_count()); - - bool has_next = false; - int row_count = 0; - while (IS_SUCC(result_set2->next(has_next)) && has_next) { - row_count++; - } - EXPECT_EQ(row_count, total_rows); - - reader.destroy_query_data_set(result_set2); - ASSERT_EQ(E_OK, reader.close()); - remove(tablet_path.c_str()); -} - -// Device id with three dot-separated parts (e.g. root.sg1.FeederA) must resolve -// to the same StringArrayDeviceID normalization as write path; queryByRow must -// not return E_DEVICE_NOT_EXIST. -TEST_F(TreeQueryByRowTest, QueryByRow_MultiSegmentDeviceId) { - std::vector devices = {"root.sg1.FeederA"}; - std::vector measurements = {"s1"}; - int num_rows = 10; - write_test_file(devices, measurements, num_rows); - - TsFileTreeReader reader; - ASSERT_EQ(E_OK, reader.open(file_name_)); - - ResultSet* result = nullptr; - ASSERT_EQ(E_OK, reader.queryByRow(devices, measurements, 0, 5, result)); - ASSERT_NE(result, nullptr); - - auto timestamps = collect_timestamps(result); - ASSERT_EQ(timestamps.size(), 5u); - for (int i = 0; i < 5; ++i) { - EXPECT_EQ(timestamps[i], i); - } - - reader.destroy_query_data_set(result); - reader.close(); -} - // Test: offset skips leading rows. TEST_F(TreeQueryByRowTest, OffsetOnly) { std::vector devices = {"d1"}; @@ -1310,8 +1102,8 @@ TEST_F(TreeQueryByRowTest, MultiPath_TimeHint_SkipsStaleChunk_WithOffset) { // Pushdown is faster than full query + manual next: queryByRow(offset, limit) // skips at Chunk/Page level; old query then manual next decodes every row. -// Timing tolerance 20% to allow measurement noise. -TEST_F(TreeQueryByRowTest, DISABLED_QueryByRowFasterThanManualNext) { +// Timing tolerance 5% to allow measurement noise. +TEST_F(TreeQueryByRowTest, QueryByRowFasterThanManualNext) { std::vector devices = {"d1"}; std::vector measurements = {"s1"}; const int num_rows = 8000; @@ -1320,7 +1112,7 @@ TEST_F(TreeQueryByRowTest, DISABLED_QueryByRowFasterThanManualNext) { write_test_file(devices, measurements, num_rows); const int num_iters = 5; - const double tolerance = 0.2; + const double tolerance = 0.05; auto run_query_by_row = [this, &devices, &measurements, offset, limit]() { TsFileTreeReader reader; diff --git a/cpp/test/reader/tsfile_reader_test.cc b/cpp/test/reader/tsfile_reader_test.cc index 45261cf45..897880862 100644 --- a/cpp/test/reader/tsfile_reader_test.cc +++ b/cpp/test/reader/tsfile_reader_test.cc @@ -21,9 +21,7 @@ #include #include -#include #include -#include #include #include "common/record.h" @@ -266,141 +264,10 @@ TEST_F(TsFileReaderTest, GetTimeseriesSchema) { reader.close(); } -TEST_F(TsFileReaderTest, GetTimeseriesMetadataTableModelTypeAndDeviceFilter) { - std::vector measurement_schemas = { - new MeasurementSchema("deviceid1", TSDataType::STRING), - new MeasurementSchema("deviceid2", TSDataType::STRING), - new MeasurementSchema("temperature", TSDataType::FLOAT), - new MeasurementSchema("pressure", TSDataType::DOUBLE), - new MeasurementSchema("humidity", TSDataType::INT32)}; - std::vector column_categories = { - ColumnCategory::TAG, ColumnCategory::TAG, ColumnCategory::FIELD, - ColumnCategory::FIELD, ColumnCategory::FIELD}; - auto table_schema = std::make_shared( - "testtable", measurement_schemas, column_categories); - - ASSERT_EQ(tsfile_writer_->register_table(table_schema), E_OK); - - Tablet tablet(table_schema->get_table_name(), - table_schema->get_measurement_names(), - table_schema->get_data_types(), - table_schema->get_column_categories(), 10); - for (int row = 0; row < 5; row++) { - ASSERT_EQ(tablet.add_timestamp(row, row), E_OK); - ASSERT_EQ(tablet.add_value(row, "deviceid1", "device_a"), E_OK); - ASSERT_EQ(tablet.add_value(row, "deviceid2", "device_b"), E_OK); - ASSERT_EQ(tablet.add_value(row, "temperature", static_cast(row)), - E_OK); - ASSERT_EQ(tablet.add_value(row, "pressure", static_cast(row)), - E_OK); - ASSERT_EQ(tablet.add_value(row, "humidity", static_cast(row)), - E_OK); - } - for (int row = 5; row < 10; row++) { - ASSERT_EQ(tablet.add_timestamp(row, row), E_OK); - ASSERT_EQ(tablet.add_value(row, "deviceid1", "device_b"), E_OK); - ASSERT_EQ(tablet.add_value(row, "deviceid2", "device_a"), E_OK); - ASSERT_EQ(tablet.add_value(row, "temperature", static_cast(row)), - E_OK); - ASSERT_EQ(tablet.add_value(row, "pressure", static_cast(row)), - E_OK); - ASSERT_EQ(tablet.add_value(row, "humidity", static_cast(row)), - E_OK); - } - - // Append one row whose middle TAG segment is null. - Tablet null_tag_tablet(table_schema->get_table_name(), - table_schema->get_measurement_names(), - table_schema->get_data_types(), - table_schema->get_column_categories(), 1); - int64_t null_tag_ts[1] = {10}; - int32_t null_tag_humidity[1] = {10}; - float null_tag_temperature[1] = {10.0F}; - double null_tag_pressure[1] = {10.0}; - // deviceid1 = null - int32_t id1_offsets[2] = {0, 0}; - uint8_t id1_bitmap[1] = {0x01}; // row0 is null - // deviceid2 = "device_b" - int32_t id2_offsets[2] = {0, 8}; - const char id2_data[] = "device_b"; - ASSERT_EQ(null_tag_tablet.set_timestamps(null_tag_ts, 1), E_OK); - ASSERT_EQ(null_tag_tablet.set_column_string_values(0, id1_offsets, "", - id1_bitmap, 1), - E_OK); - ASSERT_EQ(null_tag_tablet.set_column_string_values(1, id2_offsets, id2_data, - nullptr, 1), - E_OK); - ASSERT_EQ( - null_tag_tablet.set_column_values(2, null_tag_temperature, nullptr, 1), - E_OK); - ASSERT_EQ( - null_tag_tablet.set_column_values(3, null_tag_pressure, nullptr, 1), - E_OK); - ASSERT_EQ( - null_tag_tablet.set_column_values(4, null_tag_humidity, nullptr, 1), - E_OK); - - ASSERT_EQ(tsfile_writer_->write_table(tablet), E_OK); - ASSERT_EQ(tsfile_writer_->write_table(null_tag_tablet), E_OK); - ASSERT_EQ(tsfile_writer_->flush(), E_OK); - ASSERT_EQ(tsfile_writer_->close(), E_OK); - - storage::TsFileReader reader; - ASSERT_EQ(reader.open(file_name_), common::E_OK); - - auto all_meta = reader.get_timeseries_metadata(); - ASSERT_EQ(all_meta.size(), 3u); - - std::vector selected_device_segments = { - "testtable", "device_a", "device_b"}; - std::vector> selected_devices = { - std::make_shared(selected_device_segments)}; - auto selected_meta = reader.get_timeseries_metadata(selected_devices); - ASSERT_EQ(selected_meta.size(), 1u); - - auto selected_list = selected_meta.begin()->second; - std::unordered_map type_by_measurement; - for (const auto& index : selected_list) { - type_by_measurement[index->get_measurement_name().to_std_string()] = - index->get_data_type(); - } - ASSERT_EQ(type_by_measurement.at("temperature"), TSDataType::FLOAT); - ASSERT_EQ(type_by_measurement.at("pressure"), TSDataType::DOUBLE); - ASSERT_EQ(type_by_measurement.at("humidity"), TSDataType::INT32); - - // Query metadata for the device with null middle TAG segment. - std::vector null_seg_device = { - new std::string("testtable"), nullptr, new std::string("device_b")}; - std::vector> null_seg_devices = { - std::make_shared(null_seg_device)}; - for (auto* seg : null_seg_device) { - if (seg != nullptr) { - delete seg; - } - } - auto null_seg_meta = reader.get_timeseries_metadata(null_seg_devices); - ASSERT_EQ(null_seg_meta.size(), 1u); - auto null_seg_list = null_seg_meta.begin()->second; - ASSERT_EQ(null_seg_list.size(), 3u); - std::unordered_map null_seg_type_by_measurement; - for (const auto& index : null_seg_list) { - null_seg_type_by_measurement[index->get_measurement_name() - .to_std_string()] = - index->get_data_type(); - } - ASSERT_EQ(null_seg_type_by_measurement.at("temperature"), - TSDataType::FLOAT); - ASSERT_EQ(null_seg_type_by_measurement.at("pressure"), TSDataType::DOUBLE); - ASSERT_EQ(null_seg_type_by_measurement.at("humidity"), TSDataType::INT32); - - reader.close(); -} - static const int64_t kLargeFileNumRecords = 300000000; static const int64_t kLargeFileFlushBatch = 100000; -TEST_F(TsFileReaderTest, - DISABLED_LargeFileNoEncodingNoCompression_WriteAndRead) { +TEST_F(TsFileReaderTest, LargeFileNoEncodingNoCompression_WriteAndRead) { std::string device_path = "device1"; std::string measurement_name = "temperature"; common::TSDataType data_type = common::TSDataType::INT64; diff --git a/cpp/test/writer/table_view/tsfile_writer_table_test.cc b/cpp/test/writer/table_view/tsfile_writer_table_test.cc index d1f3b92e4..293225584 100644 --- a/cpp/test/writer/table_view/tsfile_writer_table_test.cc +++ b/cpp/test/writer/table_view/tsfile_writer_table_test.cc @@ -20,7 +20,6 @@ #include -#include "common/global.h" #include "common/record.h" #include "common/schema.h" #include "common/tablet.h" @@ -32,11 +31,10 @@ using namespace storage; using namespace common; -class TsFileWriterTableTest : public ::testing::TestWithParam { +class TsFileWriterTableTest : public ::testing::Test { protected: void SetUp() override { libtsfile_init(); - set_parallel_write_enabled(GetParam()); file_name_ = std::string("tsfile_writer_table_test_") + generate_random_string(10) + std::string(".tsfile"); remove(file_name_.c_str()); @@ -135,7 +133,7 @@ class TsFileWriterTableTest : public ::testing::TestWithParam { } }; -TEST_P(TsFileWriterTableTest, WriteTableTest) { +TEST_F(TsFileWriterTableTest, WriteTableTest) { auto table_schema = gen_table_schema(0); auto tsfile_table_writer_ = std::make_shared(&write_file_, table_schema); @@ -146,7 +144,7 @@ TEST_P(TsFileWriterTableTest, WriteTableTest) { delete table_schema; } -TEST_P(TsFileWriterTableTest, WithoutTagAndMultiPage) { +TEST_F(TsFileWriterTableTest, WithoutTagAndMultiPage) { std::vector measurement_schemas; std::vector column_categories; measurement_schemas.resize(1); @@ -194,7 +192,7 @@ TEST_P(TsFileWriterTableTest, WithoutTagAndMultiPage) { delete table_schema; } -TEST_P(TsFileWriterTableTest, WriteDisorderTest) { +TEST_F(TsFileWriterTableTest, WriteDisorderTest) { auto table_schema = gen_table_schema(0); auto tsfile_table_writer_ = std::make_shared(&write_file_, table_schema); @@ -244,7 +242,7 @@ TEST_P(TsFileWriterTableTest, WriteDisorderTest) { delete table_schema; } -TEST_P(TsFileWriterTableTest, WriteTableTestMultiFlush) { +TEST_F(TsFileWriterTableTest, WriteTableTestMultiFlush) { auto table_schema = gen_table_schema(0); auto tsfile_table_writer_ = std::make_shared( &write_file_, table_schema, 2 * 1024); @@ -257,7 +255,7 @@ TEST_P(TsFileWriterTableTest, WriteTableTestMultiFlush) { delete table_schema; } -TEST_P(TsFileWriterTableTest, WriteNonExistColumnTest) { +TEST_F(TsFileWriterTableTest, WriteNonExistColumnTest) { auto table_schema = gen_table_schema(0); auto tsfile_table_writer_ = std::make_shared(&write_file_, table_schema); @@ -285,7 +283,7 @@ TEST_P(TsFileWriterTableTest, WriteNonExistColumnTest) { delete table_schema; } -TEST_P(TsFileWriterTableTest, WriteNonExistTableTest) { +TEST_F(TsFileWriterTableTest, WriteNonExistTableTest) { auto table_schema = gen_table_schema(0); auto tsfile_table_writer_ = std::make_shared(&write_file_, table_schema); @@ -297,7 +295,7 @@ TEST_P(TsFileWriterTableTest, WriteNonExistTableTest) { delete table_schema; } -TEST_P(TsFileWriterTableTest, WriterWithMemoryThreshold) { +TEST_F(TsFileWriterTableTest, WriterWithMemoryThreshold) { auto table_schema = gen_table_schema(0); auto tsfile_table_writer_ = std::make_shared( &write_file_, table_schema, 256 * 1024 * 1024); @@ -307,7 +305,7 @@ TEST_P(TsFileWriterTableTest, WriterWithMemoryThreshold) { delete table_schema; } -TEST_P(TsFileWriterTableTest, EmptyTagWrite) { +TEST_F(TsFileWriterTableTest, EmptyTagWrite) { std::vector measurement_schemas; std::vector column_categories; measurement_schemas.resize(3); @@ -363,7 +361,7 @@ TEST_P(TsFileWriterTableTest, EmptyTagWrite) { delete table_schema; } -TEST_P(TsFileWriterTableTest, WritehDataTypeMisMatch) { +TEST_F(TsFileWriterTableTest, WritehDataTypeMisMatch) { auto table_schema = gen_table_schema(0); auto tsfile_table_writer_ = std::make_shared( &write_file_, table_schema, 256 * 1024 * 1024); @@ -414,7 +412,7 @@ TEST_P(TsFileWriterTableTest, WritehDataTypeMisMatch) { tsfile_table_writer_->close(); } -TEST_P(TsFileWriterTableTest, WriteAndReadSimple) { +TEST_F(TsFileWriterTableTest, WriteAndReadSimple) { std::vector measurement_schemas; std::vector column_categories; measurement_schemas.resize(2); @@ -469,7 +467,7 @@ TEST_P(TsFileWriterTableTest, WriteAndReadSimple) { delete table_schema; } -TEST_P(TsFileWriterTableTest, DuplicateColumnName) { +TEST_F(TsFileWriterTableTest, DuplicateColumnName) { std::vector measurement_schemas; std::vector column_categories; measurement_schemas.resize(3); @@ -507,7 +505,7 @@ TEST_P(TsFileWriterTableTest, DuplicateColumnName) { delete table_schema; } -TEST_P(TsFileWriterTableTest, WriteWithNullAndEmptyTag) { +TEST_F(TsFileWriterTableTest, WriteWithNullAndEmptyTag) { std::vector measurement_schemas; std::vector column_categories; for (int i = 0; i < 3; i++) { @@ -639,7 +637,7 @@ TEST_P(TsFileWriterTableTest, WriteWithNullAndEmptyTag) { ASSERT_EQ(reader.close(), common::E_OK); } -TEST_P(TsFileWriterTableTest, MultiDeviceMultiFields) { +TEST_F(TsFileWriterTableTest, MultiDeviceMultiFields) { common::config_set_max_degree_of_index_node(5); auto table_schema = gen_table_schema(0, 1, 100); auto tsfile_table_writer_ = @@ -698,7 +696,7 @@ TEST_P(TsFileWriterTableTest, MultiDeviceMultiFields) { delete table_schema; } -TEST_P(TsFileWriterTableTest, WriteDataWithEmptyField) { +TEST_F(TsFileWriterTableTest, WriteDataWithEmptyField) { std::vector measurement_schemas; std::vector column_categories; for (int i = 0; i < 3; i++) { @@ -775,7 +773,7 @@ TEST_P(TsFileWriterTableTest, WriteDataWithEmptyField) { ASSERT_EQ(reader.close(), common::E_OK); } -TEST_P(TsFileWriterTableTest, MultiDatatypes) { +TEST_F(TsFileWriterTableTest, MultiDatatypes) { std::vector measurement_schemas; std::vector column_categories; @@ -879,7 +877,7 @@ TEST_P(TsFileWriterTableTest, MultiDatatypes) { delete[] literal; } -TEST_P(TsFileWriterTableTest, DiffCodecTypes) { +TEST_F(TsFileWriterTableTest, DiffCodecTypes) { std::vector measurement_schemas; std::vector column_categories; @@ -987,7 +985,7 @@ TEST_P(TsFileWriterTableTest, DiffCodecTypes) { delete[] literal; } -TEST_P(TsFileWriterTableTest, EncodingConfigIntegration) { +TEST_F(TsFileWriterTableTest, EncodingConfigIntegration) { // 1. Test setting global compression type ASSERT_EQ(E_OK, set_global_compression(SNAPPY)); @@ -1100,7 +1098,7 @@ TEST_P(TsFileWriterTableTest, EncodingConfigIntegration) { } #ifdef ENABLE_MEM_STAT -TEST_P(TsFileWriterTableTest, DISABLED_MemStatWriteAndVerify) { +TEST_F(TsFileWriterTableTest, MemStatWriteAndVerify) { TableSchema* table_schema = gen_table_schema(0, 2, 3); auto tsfile_table_writer = std::make_shared(&write_file_, table_schema); @@ -1175,8 +1173,3 @@ TEST_P(TsFileWriterTableTest, DISABLED_MemStatWriteAndVerify) { delete table_schema; } #endif - -INSTANTIATE_TEST_SUITE_P(Serial, TsFileWriterTableTest, - ::testing::Values(false)); -INSTANTIATE_TEST_SUITE_P(Parallel, TsFileWriterTableTest, - ::testing::Values(true)); \ No newline at end of file diff --git a/cpp/test/writer/tsfile_writer_test.cc b/cpp/test/writer/tsfile_writer_test.cc index 285d926b1..92f5831ee 100644 --- a/cpp/test/writer/tsfile_writer_test.cc +++ b/cpp/test/writer/tsfile_writer_test.cc @@ -808,241 +808,6 @@ TEST_F(TsFileWriterTest, WriteAlignedTimeseries) { reader.destroy_query_data_set(qds); } -/* - * Aligned page seal synchronization tests. - * - * In the aligned model, time page and every value page must seal together - * so that each chunk has the same number of pages. Without synchronization, - * a threshold hit on one page (point-count or memory) would seal only that - * page, producing misaligned page counts and corrupt reads. - * - * Three sub-cases: - * 1. Time page reaches point-count threshold first; value pages have - * partial nulls so their non-null statistic count is lower and they - * would NOT seal on their own. - * 2. Time page reaches memory threshold first; value pages are mostly - * null so their encoded-data memory is much smaller. - * 3. A value page (STRING, large per-row memory) reaches memory - * threshold first; time page and other value pages have not. - */ - -// Case 1: time page seals by point-count; value pages with partial nulls -// have fewer non-null points (statistic count) and would not self-seal. -// Sync mechanism must force all value pages to seal together. -TEST_F(TsFileWriterTest, AlignedSealSync_PointCountWithNulls) { - uint32_t prev_pt = g_config_value_.page_writer_max_point_num_; - uint32_t prev_mem = g_config_value_.page_writer_max_memory_bytes_; - struct Guard { - uint32_t pt, mem; - ~Guard() { - g_config_value_.page_writer_max_point_num_ = pt; - g_config_value_.page_writer_max_memory_bytes_ = mem; - } - } guard{prev_pt, prev_mem}; - g_config_value_.page_writer_max_point_num_ = 10; - g_config_value_.page_writer_max_memory_bytes_ = 1024 * 1024; - - std::string device_name = "device_pt_null"; - std::vector mnames = {"s0", "s1", "s2"}; - std::vector schemas; - for (auto& n : mnames) { - schemas.push_back(new MeasurementSchema(n, INT64, PLAIN, UNCOMPRESSED)); - } - tsfile_writer_->register_aligned_timeseries(device_name, schemas); - - // s0: always non-null -> 10 non-null per 10-row page, self-seals - // s1: null on even rows -> 5 non-null per page, won't self-seal - // s2: null except every 5th row -> 2 non-null per page, won't self-seal - int row_num = 30; - for (int i = 0; i < row_num; ++i) { - TsRecord record(1622505600000 + i, device_name); - record.add_point(mnames[0], static_cast(i)); - if (i % 2 != 0) { - record.add_point(mnames[1], static_cast(i * 10)); - } else { - record.points_.emplace_back(DataPoint(mnames[1])); - } - if (i % 5 == 0) { - record.add_point(mnames[2], static_cast(i * 100)); - } else { - record.points_.emplace_back(DataPoint(mnames[2])); - } - ASSERT_EQ(tsfile_writer_->write_record_aligned(record), E_OK); - } - ASSERT_EQ(tsfile_writer_->flush(), E_OK); - ASSERT_EQ(tsfile_writer_->close(), E_OK); - - std::vector select_list; - for (auto& n : mnames) { - select_list.emplace_back(device_name, n); - } - storage::QueryExpression* qe = - storage::QueryExpression::create(select_list, nullptr); - storage::TsFileReader reader; - ASSERT_EQ(reader.open(file_name_), E_OK); - storage::ResultSet* tmp_qds = nullptr; - ASSERT_EQ(reader.query(qe, tmp_qds), E_OK); - auto* qds = (QDSWithoutTimeGenerator*)tmp_qds; - - bool has_next = false; - int64_t cur_row = 0; - while (IS_SUCC(qds->next(has_next)) && has_next) { - auto* rec = qds->get_row_record(); - ASSERT_NE(rec, nullptr); - EXPECT_EQ(rec->get_timestamp(), 1622505600000 + cur_row); - EXPECT_EQ(field_to_string(rec->get_field(1)), std::to_string(cur_row)); - if (cur_row % 2 != 0) { - EXPECT_EQ(field_to_string(rec->get_field(2)), - std::to_string(cur_row * 10)); - } - if (cur_row % 5 == 0) { - EXPECT_EQ(field_to_string(rec->get_field(3)), - std::to_string(cur_row * 100)); - } - cur_row++; - } - EXPECT_EQ(cur_row, row_num); - reader.destroy_query_data_set(qds); - ASSERT_EQ(reader.close(), E_OK); -} - -// Case 2: time page seals by memory threshold first. Value pages are mostly -// null so their encoded-value memory grows much slower than the time page -// (INT64 PLAIN = 8 bytes/point). Time page hits 512 bytes at ~64 points; -// value pages with 1 non-null every 20 rows only have ~24 bytes of value -// data at that point. Sync must force all value pages to seal. -TEST_F(TsFileWriterTest, AlignedSealSync_TimeMemoryFirst) { - uint32_t prev_pt = g_config_value_.page_writer_max_point_num_; - uint32_t prev_mem = g_config_value_.page_writer_max_memory_bytes_; - struct Guard { - uint32_t pt, mem; - ~Guard() { - g_config_value_.page_writer_max_point_num_ = pt; - g_config_value_.page_writer_max_memory_bytes_ = mem; - } - } guard{prev_pt, prev_mem}; - g_config_value_.page_writer_max_point_num_ = 10000; - g_config_value_.page_writer_max_memory_bytes_ = 512; - - std::string device_name = "device_time_mem"; - std::vector mnames = {"s0", "s1"}; - std::vector schemas; - for (auto& n : mnames) { - schemas.push_back(new MeasurementSchema(n, INT64, PLAIN, UNCOMPRESSED)); - } - tsfile_writer_->register_aligned_timeseries(device_name, schemas); - - int row_num = 200; - for (int i = 0; i < row_num; ++i) { - TsRecord record(1622505600000 + i, device_name); - if (i % 20 == 0) { - record.add_point(mnames[0], static_cast(i)); - record.add_point(mnames[1], static_cast(i * 10)); - } else { - record.points_.emplace_back(DataPoint(mnames[0])); - record.points_.emplace_back(DataPoint(mnames[1])); - } - ASSERT_EQ(tsfile_writer_->write_record_aligned(record), E_OK); - } - ASSERT_EQ(tsfile_writer_->flush(), E_OK); - ASSERT_EQ(tsfile_writer_->close(), E_OK); - - std::vector select_list; - for (auto& n : mnames) { - select_list.emplace_back(device_name, n); - } - storage::QueryExpression* qe = - storage::QueryExpression::create(select_list, nullptr); - storage::TsFileReader reader; - ASSERT_EQ(reader.open(file_name_), E_OK); - storage::ResultSet* tmp_qds = nullptr; - ASSERT_EQ(reader.query(qe, tmp_qds), E_OK); - auto* qds = (QDSWithoutTimeGenerator*)tmp_qds; - - bool has_next = false; - int64_t cur_row = 0; - while (IS_SUCC(qds->next(has_next)) && has_next) { - auto* rec = qds->get_row_record(); - ASSERT_NE(rec, nullptr); - EXPECT_EQ(rec->get_timestamp(), 1622505600000 + cur_row); - if (cur_row % 20 == 0) { - EXPECT_EQ(field_to_string(rec->get_field(1)), - std::to_string(cur_row)); - EXPECT_EQ(field_to_string(rec->get_field(2)), - std::to_string(cur_row * 10)); - } - cur_row++; - } - EXPECT_EQ(cur_row, row_num); - reader.destroy_query_data_set(qds); - ASSERT_EQ(reader.close(), E_OK); -} - -// Case 3: a value page (STRING type, ~104 bytes/point with PLAIN encoding) -// seals by memory threshold before the time page (INT64, 8 bytes/point). -// With threshold=512, STRING value page seals at ~5 points while time page -// only has ~40 bytes. Sync must force time page and other value pages to seal. -TEST_F(TsFileWriterTest, AlignedSealSync_ValueMemoryFirst) { - uint32_t prev_pt = g_config_value_.page_writer_max_point_num_; - uint32_t prev_mem = g_config_value_.page_writer_max_memory_bytes_; - struct Guard { - uint32_t pt, mem; - ~Guard() { - g_config_value_.page_writer_max_point_num_ = pt; - g_config_value_.page_writer_max_memory_bytes_ = mem; - } - } guard{prev_pt, prev_mem}; - g_config_value_.page_writer_max_point_num_ = 10000; - g_config_value_.page_writer_max_memory_bytes_ = 512; - - std::string device_name = "device_val_mem"; - std::vector schemas; - schemas.push_back(new MeasurementSchema("s0", INT64, PLAIN, UNCOMPRESSED)); - schemas.push_back(new MeasurementSchema("s1", STRING, PLAIN, UNCOMPRESSED)); - tsfile_writer_->register_aligned_timeseries(device_name, schemas); - - char* long_buf = new char[101]; - memset(long_buf, 'A', 100); - long_buf[100] = '\0'; - common::String str_val(long_buf, 100); - - int row_num = 100; - for (int i = 0; i < row_num; ++i) { - TsRecord record(1622505600000 + i, device_name); - record.add_point(std::string("s0"), static_cast(i)); - record.add_point(std::string("s1"), str_val); - ASSERT_EQ(tsfile_writer_->write_record_aligned(record), E_OK); - } - delete[] long_buf; - ASSERT_EQ(tsfile_writer_->flush(), E_OK); - ASSERT_EQ(tsfile_writer_->close(), E_OK); - - std::string s0("s0"), s1("s1"); - std::vector select_list; - select_list.emplace_back(device_name, s0); - select_list.emplace_back(device_name, s1); - storage::QueryExpression* qe = - storage::QueryExpression::create(select_list, nullptr); - storage::TsFileReader reader; - ASSERT_EQ(reader.open(file_name_), E_OK); - storage::ResultSet* tmp_qds = nullptr; - ASSERT_EQ(reader.query(qe, tmp_qds), E_OK); - auto* qds = (QDSWithoutTimeGenerator*)tmp_qds; - - bool has_next = false; - int64_t cur_row = 0; - while (IS_SUCC(qds->next(has_next)) && has_next) { - auto* rec = qds->get_row_record(); - ASSERT_NE(rec, nullptr); - EXPECT_EQ(rec->get_timestamp(), 1622505600000 + cur_row); - EXPECT_EQ(field_to_string(rec->get_field(1)), std::to_string(cur_row)); - cur_row++; - } - EXPECT_EQ(cur_row, row_num); - reader.destroy_query_data_set(qds); - ASSERT_EQ(reader.close(), E_OK); -} - TEST_F(TsFileWriterTest, WriteAlignedMultiFlush) { int measurement_num = 100, row_num = 100; std::string device_name = "device"; @@ -1229,4 +994,4 @@ TEST_F(TsFileWriterTest, WriteTabletDataTypeMismatch) { ASSERT_EQ(E_TYPE_NOT_MATCH, tsfile_writer_->write_tablet_aligned(tablet)); ASSERT_EQ(tsfile_writer_->flush(), E_OK); ASSERT_EQ(tsfile_writer_->close(), E_OK); -} \ No newline at end of file +}