From 7bb949806449195d96d529095d148f3f7a1d86d7 Mon Sep 17 00:00:00 2001 From: xiaoheng Date: Tue, 17 Mar 2026 21:48:26 +0800 Subject: [PATCH 1/4] feat: support rangebitmap read and write --- src/paimon/CMakeLists.txt | 1 + src/paimon/common/file_index/CMakeLists.txt | 5 +- .../rangebitmap/dictionary/key_factory.h | 2 +- .../file_index/rangebitmap/range_bitmap.cpp | 310 ++++++++++ .../file_index/rangebitmap/range_bitmap.h | 111 ++++ .../rangebitmap/range_bitmap_file_index.cpp | 244 ++++++++ .../rangebitmap/range_bitmap_file_index.h | 111 ++++ .../range_bitmap_file_index_factory.cpp | 32 + .../range_bitmap_file_index_factory.h | 41 ++ .../range_bitmap_file_index_test.cpp | 567 ++++++++++++++++++ test/inte/read_inte_with_index_test.cpp | 379 ++++++++++++ .../append_with_rangebitmap/README | 16 + ...5759403d-17fc-4031-b5bb-5e22b02fdb3b-0.orc | Bin 0 -> 1024 bytes ...3d-17fc-4031-b5bb-5e22b02fdb3b-0.orc.index | Bin 0 -> 1288 bytes ...est-642b5c6e-a8d6-46d0-bd3b-686478c89f6b-0 | Bin 0 -> 2175 bytes ...ist-64353b80-fb7c-470e-972a-07d0717af717-0 | Bin 0 -> 1006 bytes ...ist-64353b80-fb7c-470e-972a-07d0717af717-1 | Bin 0 -> 1113 bytes .../append_with_rangebitmap/schema/schema-0 | 39 ++ .../append_with_rangebitmap/snapshot/EARLIEST | 1 + .../append_with_rangebitmap/snapshot/LATEST | 1 + .../snapshot/snapshot-1 | 16 + .../README | 17 + ...b6b64e1e-e3d2-4f08-9a36-726a96cde1be-0.orc | Bin 0 -> 1024 bytes ...1e-e3d2-4f08-9a36-726a96cde1be-0.orc.index | Bin 0 -> 1413 bytes ...est-71437504-f8ad-4b7d-be04-28203480227d-0 | Bin 0 -> 2176 bytes ...ist-72b69947-13fc-4b20-8db2-ffa7d607b38f-0 | Bin 0 -> 1006 bytes ...ist-72b69947-13fc-4b20-8db2-ffa7d607b38f-1 | Bin 0 -> 1106 bytes .../schema/schema-0 | 44 ++ .../snapshot/EARLIEST | 1 + .../snapshot/LATEST | 1 + .../snapshot/snapshot-1 | 16 + .../append_with_rangebitmap/README | 16 + ...52e2-e4b5-4807-bf92-04401ed10560-0.parquet | Bin 0 -> 1397 bytes ...4b5-4807-bf92-04401ed10560-0.parquet.index | Bin 0 -> 1288 bytes ...est-75f07296-b729-48db-aadd-17826f0aadf9-0 | Bin 0 -> 2180 bytes ...ist-28f50ecd-5d0e-4366-a13b-ca1f39bfac83-0 | Bin 0 -> 1006 bytes ...ist-28f50ecd-5d0e-4366-a13b-ca1f39bfac83-1 | Bin 0 -> 1108 bytes .../append_with_rangebitmap/schema/schema-0 | 38 ++ .../append_with_rangebitmap/snapshot/EARLIEST | 1 + .../append_with_rangebitmap/snapshot/LATEST | 1 + .../snapshot/snapshot-1 | 16 + .../README | 17 + ...3af1-afcb-4b84-b69a-ae472ba517f2-0.parquet | Bin 0 -> 1397 bytes ...fcb-4b84-b69a-ae472ba517f2-0.parquet.index | Bin 0 -> 1413 bytes ...est-90122746-0b4c-4328-8a04-576ee6b4cb83-0 | Bin 0 -> 2180 bytes ...ist-a63d920d-a1ef-444b-a1cf-97b5df41f6e8-0 | Bin 0 -> 1006 bytes ...ist-a63d920d-a1ef-444b-a1cf-97b5df41f6e8-1 | Bin 0 -> 1108 bytes .../schema/schema-0 | 43 ++ .../snapshot/EARLIEST | 1 + .../snapshot/LATEST | 1 + .../snapshot/snapshot-1 | 16 + 51 files changed, 2103 insertions(+), 2 deletions(-) create mode 100644 src/paimon/common/file_index/rangebitmap/range_bitmap.cpp create mode 100644 src/paimon/common/file_index/rangebitmap/range_bitmap.h create mode 100644 src/paimon/common/file_index/rangebitmap/range_bitmap_file_index.cpp create mode 100644 src/paimon/common/file_index/rangebitmap/range_bitmap_file_index.h create mode 100644 src/paimon/common/file_index/rangebitmap/range_bitmap_file_index_factory.cpp create mode 100644 src/paimon/common/file_index/rangebitmap/range_bitmap_file_index_factory.h create mode 100644 src/paimon/common/file_index/rangebitmap/range_bitmap_file_index_test.cpp create mode 100644 test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/README create mode 100644 test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/bucket-0/data-5759403d-17fc-4031-b5bb-5e22b02fdb3b-0.orc create mode 100644 test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/bucket-0/data-5759403d-17fc-4031-b5bb-5e22b02fdb3b-0.orc.index create mode 100644 test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/manifest/manifest-642b5c6e-a8d6-46d0-bd3b-686478c89f6b-0 create mode 100644 test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/manifest/manifest-list-64353b80-fb7c-470e-972a-07d0717af717-0 create mode 100644 test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/manifest/manifest-list-64353b80-fb7c-470e-972a-07d0717af717-1 create mode 100644 test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/schema/schema-0 create mode 100644 test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/snapshot/EARLIEST create mode 100644 test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/snapshot/LATEST create mode 100644 test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/snapshot/snapshot-1 create mode 100644 test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/README create mode 100644 test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/bucket-0/data-b6b64e1e-e3d2-4f08-9a36-726a96cde1be-0.orc create mode 100644 test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/bucket-0/data-b6b64e1e-e3d2-4f08-9a36-726a96cde1be-0.orc.index create mode 100644 test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/manifest/manifest-71437504-f8ad-4b7d-be04-28203480227d-0 create mode 100644 test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/manifest/manifest-list-72b69947-13fc-4b20-8db2-ffa7d607b38f-0 create mode 100644 test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/manifest/manifest-list-72b69947-13fc-4b20-8db2-ffa7d607b38f-1 create mode 100644 test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/schema/schema-0 create mode 100644 test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/EARLIEST create mode 100644 test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/LATEST create mode 100644 test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/snapshot-1 create mode 100644 test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/README create mode 100644 test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/bucket-0/data-2fb852e2-e4b5-4807-bf92-04401ed10560-0.parquet create mode 100644 test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/bucket-0/data-2fb852e2-e4b5-4807-bf92-04401ed10560-0.parquet.index create mode 100644 test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/manifest/manifest-75f07296-b729-48db-aadd-17826f0aadf9-0 create mode 100644 test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/manifest/manifest-list-28f50ecd-5d0e-4366-a13b-ca1f39bfac83-0 create mode 100644 test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/manifest/manifest-list-28f50ecd-5d0e-4366-a13b-ca1f39bfac83-1 create mode 100644 test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/schema/schema-0 create mode 100644 test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/snapshot/EARLIEST create mode 100644 test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/snapshot/LATEST create mode 100644 test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/snapshot/snapshot-1 create mode 100644 test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/README create mode 100644 test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/bucket-0/data-e4673af1-afcb-4b84-b69a-ae472ba517f2-0.parquet create mode 100644 test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/bucket-0/data-e4673af1-afcb-4b84-b69a-ae472ba517f2-0.parquet.index create mode 100644 test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/manifest/manifest-90122746-0b4c-4328-8a04-576ee6b4cb83-0 create mode 100644 test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/manifest/manifest-list-a63d920d-a1ef-444b-a1cf-97b5df41f6e8-0 create mode 100644 test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/manifest/manifest-list-a63d920d-a1ef-444b-a1cf-97b5df41f6e8-1 create mode 100644 test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/schema/schema-0 create mode 100644 test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/EARLIEST create mode 100644 test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/LATEST create mode 100644 test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/snapshot-1 diff --git a/src/paimon/CMakeLists.txt b/src/paimon/CMakeLists.txt index e4ff1138..b773f42d 100644 --- a/src/paimon/CMakeLists.txt +++ b/src/paimon/CMakeLists.txt @@ -382,6 +382,7 @@ if(PAIMON_BUILD_TESTS) common/file_index/bsi/bit_slice_index_roaring_bitmap_test.cpp common/file_index/rangebitmap/bit_slice_index_bitmap_test.cpp common/file_index/rangebitmap/dictionary/chunked_dictionary_test.cpp + common/file_index/rangebitmap/range_bitmap_file_index_test.cpp common/file_index/bloomfilter/bloom_filter_file_index_test.cpp common/file_index/bloomfilter/fast_hash_test.cpp common/global_index/complete_index_score_batch_reader_test.cpp diff --git a/src/paimon/common/file_index/CMakeLists.txt b/src/paimon/common/file_index/CMakeLists.txt index cff27cc9..9ac0804a 100644 --- a/src/paimon/common/file_index/CMakeLists.txt +++ b/src/paimon/common/file_index/CMakeLists.txt @@ -28,7 +28,10 @@ set(PAIMON_FILE_INDEX_SRC rangebitmap/dictionary/fixed_length_chunk.cpp rangebitmap/dictionary/key_factory.cpp rangebitmap/utils/literal_serialization_utils.cpp - rangebitmap/bit_slice_index_bitmap.cpp) + rangebitmap/bit_slice_index_bitmap.cpp + rangebitmap/range_bitmap.cpp + rangebitmap/range_bitmap_file_index.cpp + rangebitmap/range_bitmap_file_index_factory.cpp) add_paimon_lib(paimon_file_index SOURCES diff --git a/src/paimon/common/file_index/rangebitmap/dictionary/key_factory.h b/src/paimon/common/file_index/rangebitmap/dictionary/key_factory.h index 73e18cda..1b1167af 100644 --- a/src/paimon/common/file_index/rangebitmap/dictionary/key_factory.h +++ b/src/paimon/common/file_index/rangebitmap/dictionary/key_factory.h @@ -54,7 +54,7 @@ class KeyFactory : public std::enable_shared_from_this { static Result> Create(FieldType field_type); public: - static constexpr char DEFAULT_CHUNK_SIZE[] = "16kb"; + static constexpr char kDefaultChunkSize[] = "16kb"; }; class FixedLengthKeyFactory : public KeyFactory { diff --git a/src/paimon/common/file_index/rangebitmap/range_bitmap.cpp b/src/paimon/common/file_index/rangebitmap/range_bitmap.cpp new file mode 100644 index 00000000..ee2acaea --- /dev/null +++ b/src/paimon/common/file_index/rangebitmap/range_bitmap.cpp @@ -0,0 +1,310 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/common/file_index/rangebitmap/range_bitmap.h" + +#include + +#include "fmt/format.h" +#include "paimon/common/file_index/rangebitmap/dictionary/chunked_dictionary.h" +#include "paimon/common/file_index/rangebitmap/dictionary/key_factory.h" +#include "paimon/common/file_index/rangebitmap/utils/literal_serialization_utils.h" +#include "paimon/common/io/data_output_stream.h" +#include "paimon/common/io/memory_segment_output_stream.h" +#include "paimon/common/memory/memory_segment_utils.h" +#include "paimon/common/utils/field_type_utils.h" +#include "paimon/io/data_input_stream.h" +#include "paimon/memory/bytes.h" + +namespace paimon { + +Result> RangeBitmap::Create( + const std::shared_ptr& input_stream, const int64_t offset, + const FieldType field_type, const std::shared_ptr& pool) { + PAIMON_RETURN_NOT_OK(input_stream->Seek(offset, SeekOrigin::FS_SEEK_SET)); + const auto data_in = std::make_shared(input_stream); + PAIMON_ASSIGN_OR_RAISE(int32_t header_length, data_in->ReadValue()); + PAIMON_ASSIGN_OR_RAISE(int8_t version, data_in->ReadValue()); + if (version != kCurrentVersion) { + return Status::Invalid(fmt::format("RangeBitmap unsupported version: {}", version)); + } + PAIMON_ASSIGN_OR_RAISE(int32_t rid, data_in->ReadValue()); + PAIMON_ASSIGN_OR_RAISE(int32_t cardinality, data_in->ReadValue()); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr shared_key_factory, + KeyFactory::Create(field_type)); + PAIMON_ASSIGN_OR_RAISE(LiteralSerDeUtils::Deserializer key_deserializer, + LiteralSerDeUtils::CreateValueReader(field_type)); + auto min = Literal{field_type}; + auto max = Literal{field_type}; + if (cardinality > 0) { + PAIMON_ASSIGN_OR_RAISE(min, key_deserializer(data_in, pool.get())); + PAIMON_ASSIGN_OR_RAISE(max, key_deserializer(data_in, pool.get())); + } + PAIMON_ASSIGN_OR_RAISE(int32_t dictionary_length, data_in->ReadValue()); + auto dictionary_offset = static_cast(offset + sizeof(int32_t) + header_length); + int32_t bsi_offset = dictionary_offset + dictionary_length; + return std::unique_ptr(new RangeBitmap(rid, cardinality, dictionary_offset, + bsi_offset, min, max, shared_key_factory, + input_stream, pool)); +} + +Status RangeBitmap::Not(RoaringBitmap32* out) { + out->Flip(0, rid_); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 is_not_null, this->IsNotNull()); + *out &= is_not_null; + return Status::OK(); +} + +Result RangeBitmap::Eq(const Literal& key) { + if (cardinality_ <= 0) { + return RoaringBitmap32(); + } + PAIMON_ASSIGN_OR_RAISE(int32_t min_compare, key.CompareTo(min_)); + PAIMON_ASSIGN_OR_RAISE(int32_t max_compare, key.CompareTo(max_)); + PAIMON_ASSIGN_OR_RAISE(BitSliceIndexBitmap * bit_slice_ptr, this->GetBitSliceIndex()); + if (min_compare == 0 && max_compare == 0) { + return bit_slice_ptr->IsNotNull({}); + } + if (min_compare < 0 || max_compare > 0) { + return RoaringBitmap32(); + } + PAIMON_ASSIGN_OR_RAISE(Dictionary * dictionary, this->GetDictionary()); + PAIMON_ASSIGN_OR_RAISE(int32_t code, dictionary->Find(key)); + if (code < 0) { + return RoaringBitmap32(); + } + return bit_slice_ptr->Eq(code); +} + +Result RangeBitmap::Neq(const Literal& key) { + if (cardinality_ <= 0) { + return RoaringBitmap32(); + } + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 result, Eq(key)); + PAIMON_RETURN_NOT_OK(Not(&result)); + return result; +} + +Result RangeBitmap::Lt(const Literal& key) { + if (cardinality_ <= 0) { + return RoaringBitmap32(); + } + PAIMON_ASSIGN_OR_RAISE(int32_t min_compare, key.CompareTo(min_)); + PAIMON_ASSIGN_OR_RAISE(int32_t max_compare, key.CompareTo(max_)); + if (max_compare > 0) { + return IsNotNull(); + } + if (min_compare <= 0) { + return RoaringBitmap32(); + } + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 result, Gte(key)); + PAIMON_RETURN_NOT_OK(Not(&result)); + return result; +} + +Result RangeBitmap::Lte(const Literal& key) { + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 lt_result, Lt(key)); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 eq_result, Eq(key)); + lt_result |= eq_result; + return lt_result; +} + +Result RangeBitmap::Gt(const Literal& key) { + if (cardinality_ <= 0) { + return RoaringBitmap32(); + } + PAIMON_ASSIGN_OR_RAISE(int32_t max_compare, key.CompareTo(max_)); + if (max_compare >= 0) { + return RoaringBitmap32(); + } + PAIMON_ASSIGN_OR_RAISE(int32_t min_compare, key.CompareTo(min_)); + if (min_compare < 0) { + return IsNotNull(); + } + PAIMON_ASSIGN_OR_RAISE(Dictionary * dictionary, this->GetDictionary()); + PAIMON_ASSIGN_OR_RAISE(int32_t code, dictionary->Find(key)); + PAIMON_ASSIGN_OR_RAISE(BitSliceIndexBitmap * bit_slice_ptr, this->GetBitSliceIndex()); + if (code >= 0) { + return bit_slice_ptr->Gt(code); + } + return bit_slice_ptr->Gte(-code - 1); +} + +Result RangeBitmap::Gte(const Literal& key) { + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 gt_result, Gt(key)); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 eq_result, Eq(key)); + gt_result |= eq_result; + return gt_result; +} + +Result RangeBitmap::In(const std::vector& keys) { + if (cardinality_ <= 0) { + return RoaringBitmap32(); + } + RoaringBitmap32 result{}; + for (const auto& key : keys) { + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 bitmap, Eq(key)); + result |= bitmap; + } + return result; +} + +Result RangeBitmap::NotIn(const std::vector& keys) { + if (cardinality_ <= 0) { + return RoaringBitmap32(); + } + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 result, In(keys)); + PAIMON_RETURN_NOT_OK(Not(&result)); + return result; +} + +Result RangeBitmap::IsNull() { + if (cardinality_ <= 0) { + if (rid_ > 0) { + RoaringBitmap32 result; + result.AddRange(0, rid_); + return result; + } + return RoaringBitmap32(); + } + + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 non_null_bitmap, IsNotNull()); + non_null_bitmap.Flip(0, rid_); + return non_null_bitmap; +} + +Result RangeBitmap::IsNotNull() { + if (cardinality_ <= 0) { + return RoaringBitmap32(); + } + PAIMON_ASSIGN_OR_RAISE(BitSliceIndexBitmap* const bit_slice_ptr, this->GetBitSliceIndex()); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 result, bit_slice_ptr->IsNotNull({})); + return result; +} + +RangeBitmap::RangeBitmap(const int32_t rid, const int32_t cardinality, + const int32_t dictionary_offset, const int32_t bsi_offset, + const Literal& min, const Literal& max, + const std::shared_ptr& key_factory, + const std::shared_ptr& input_stream, + const std::shared_ptr& pool) + : pool_(pool), + rid_(rid), + cardinality_(cardinality), + bsi_offset_(bsi_offset), + dictionary_offset_(dictionary_offset), + min_(min), + max_(max), + key_factory_(key_factory), + input_stream_(input_stream), + bsi_(nullptr), + dictionary_(nullptr) {} + +Result RangeBitmap::GetBitSliceIndex() { + if (bsi_ == nullptr) { + PAIMON_ASSIGN_OR_RAISE(bsi_, + BitSliceIndexBitmap::Create(input_stream_, bsi_offset_, pool_)); + } + return bsi_.get(); +} + +Result RangeBitmap::GetDictionary() { + if (dictionary_ == nullptr) { + PAIMON_ASSIGN_OR_RAISE( + dictionary_, ChunkedDictionary::Create(key_factory_->GetFieldType(), input_stream_, + dictionary_offset_, pool_)); + } + return dictionary_.get(); +} + +Result> RangeBitmap::Appender::Create( + const std::shared_ptr& factory, int64_t limited_serialized_size_in_bytes, + const std::shared_ptr& pool) { + return std::unique_ptr(new Appender(factory, limited_serialized_size_in_bytes, pool)); +} + +RangeBitmap::Appender::Appender(const std::shared_ptr& factory, + const int64_t limited_serialized_size_in_bytes, + const std::shared_ptr& pool) + : pool_(pool), + rid_(0), + bitmaps_(LiteralComparator(factory)), + factory_(factory), + chunk_size_bytes_limit_(limited_serialized_size_in_bytes) {} + +void RangeBitmap::Appender::Append(const Literal& key) { + if (!key.IsNull()) { + bitmaps_[key].Add(rid_); + } + rid_++; +} + +Result> RangeBitmap::Appender::Serialize() const { + int32_t code = 0; + PAIMON_ASSIGN_OR_RAISE(auto bsi, + BitSliceIndexBitmap::Appender::Create( + 0, std::max(static_cast(bitmaps_.size() - 1), 0), pool_)); + PAIMON_ASSIGN_OR_RAISE(auto dictionary, + ChunkedDictionary::Appender::Create( + factory_, static_cast(chunk_size_bytes_limit_), pool_)); + for (const auto& [key, bitmap] : bitmaps_) { + PAIMON_RETURN_NOT_OK(dictionary->AppendSorted(key, code)); + for (auto it = bitmap.Begin(); it != bitmap.End(); ++it) { + PAIMON_RETURN_NOT_OK(bsi->Append(*it, code)); + } + code++; + } + PAIMON_ASSIGN_OR_RAISE(LiteralSerDeUtils::Serializer serializer, + LiteralSerDeUtils::CreateValueWriter(factory_->GetFieldType())); + Literal min{factory_->GetFieldType()}; + Literal max{factory_->GetFieldType()}; + if (!bitmaps_.empty()) { + min = bitmaps_.begin()->first; + max = bitmaps_.rbegin()->first; + } + PAIMON_ASSIGN_OR_RAISE(int32_t min_size, LiteralSerDeUtils::GetSerializedSizeInBytes(min)); + PAIMON_ASSIGN_OR_RAISE(int32_t max_size, LiteralSerDeUtils::GetSerializedSizeInBytes(max)); + int32_t header_size = 0; + header_size += sizeof(int8_t); // version + header_size += sizeof(int32_t); // rid + header_size += sizeof(int32_t); // cardinality + header_size += min.IsNull() ? 0 : min_size; // min literal size + header_size += max.IsNull() ? 0 : max_size; // max literal size + header_size += sizeof(int32_t); // dictionary length + PAIMON_ASSIGN_OR_RAISE(PAIMON_UNIQUE_PTR dictionary_bytes, dictionary->Serialize()); + auto dictionary_length = static_cast(dictionary_bytes->size()); + PAIMON_ASSIGN_OR_RAISE(PAIMON_UNIQUE_PTR bsi_bytes, bsi->Serialize()); + size_t bsi_length = bsi_bytes->size(); + const auto data_output_stream = std::make_shared( + MemorySegmentOutputStream::DEFAULT_SEGMENT_SIZE, pool_); + data_output_stream->WriteValue(header_size); + data_output_stream->WriteValue(kCurrentVersion); + data_output_stream->WriteValue(rid_); + data_output_stream->WriteValue(static_cast(bitmaps_.size())); + if (!min.IsNull()) { + PAIMON_RETURN_NOT_OK(serializer(data_output_stream, min)); + } + if (!max.IsNull()) { + PAIMON_RETURN_NOT_OK(serializer(data_output_stream, max)); + } + data_output_stream->WriteValue(dictionary_length); + data_output_stream->Write(dictionary_bytes->data(), dictionary_length); + data_output_stream->Write(bsi_bytes->data(), bsi_length); + return MemorySegmentUtils::CopyToBytes(data_output_stream->Segments(), 0, + static_cast(data_output_stream->CurrentSize()), + pool_.get()); +} +} // namespace paimon diff --git a/src/paimon/common/file_index/rangebitmap/range_bitmap.h b/src/paimon/common/file_index/rangebitmap/range_bitmap.h new file mode 100644 index 00000000..311f84b7 --- /dev/null +++ b/src/paimon/common/file_index/rangebitmap/range_bitmap.h @@ -0,0 +1,111 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include "paimon/common/file_index/rangebitmap/bit_slice_index_bitmap.h" +#include "paimon/common/file_index/rangebitmap/dictionary/dictionary.h" +#include "paimon/common/file_index/rangebitmap/dictionary/key_factory.h" +#include "paimon/fs/file_system.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" +#include "paimon/utils/roaring_bitmap32.h" + +namespace paimon { + +class InputStream; +class MemoryPool; + +class RangeBitmap { + public: + static Result> Create( + const std::shared_ptr& input_stream, int64_t offset, FieldType field_type, + const std::shared_ptr& pool); + + Result Eq(const Literal& key); + Result Neq(const Literal& key); + Result Lt(const Literal& key); + Result Lte(const Literal& key); + Result Gt(const Literal& key); + Result Gte(const Literal& key); + Result In(const std::vector& keys); + Result NotIn(const std::vector& keys); + Result IsNull(); + Result IsNotNull(); + + public: + static constexpr int8_t kCurrentVersion = 1; + + private: + Status Not(RoaringBitmap32* out); + RangeBitmap(int32_t rid, int32_t cardinality, int32_t dictionary_offset, int32_t bsi_offset, + const Literal& min, const Literal& max, + const std::shared_ptr& key_factory, + const std::shared_ptr& input_stream, + const std::shared_ptr& pool); + Result GetBitSliceIndex(); + Result GetDictionary(); + + private: + std::shared_ptr pool_; + int32_t rid_; + int32_t cardinality_; + int32_t bsi_offset_; + int32_t dictionary_offset_; + Literal min_; + Literal max_; + std::shared_ptr key_factory_; + std::shared_ptr input_stream_; + + // For lazy loading + std::unique_ptr bsi_; + std::unique_ptr dictionary_; + + public: + class Appender { + public: + static Result> Create(const std::shared_ptr& factory, + int64_t limited_serialized_size_in_bytes, + const std::shared_ptr& pool); + void Append(const Literal& key); + Result> Serialize() const; + + private: + Appender(const std::shared_ptr& factory, + int64_t limited_serialized_size_in_bytes, const std::shared_ptr& pool); + struct LiteralComparator { + std::shared_ptr factory; + + explicit LiteralComparator(std::shared_ptr f) : factory(std::move(f)) {} + + bool operator()(const Literal& lhs, const Literal& rhs) const { + const auto result = factory->CompareLiteral(lhs, rhs); + return result.ok() && result.value() < 0; + } + }; + std::shared_ptr pool_; + int32_t rid_; + std::map bitmaps_; + std::shared_ptr factory_; + int64_t chunk_size_bytes_limit_; + }; +}; + +} // namespace paimon diff --git a/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index.cpp b/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index.cpp new file mode 100644 index 00000000..8e04186c --- /dev/null +++ b/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index.cpp @@ -0,0 +1,244 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/common/file_index/rangebitmap/range_bitmap_file_index.h" + +#include +#include + +#include "paimon/common/file_index/rangebitmap/range_bitmap.h" +#include "paimon/common/options/memory_size.h" +#include "paimon/common/predicate/literal_converter.h" +#include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/common/utils/field_type_utils.h" +#include "paimon/file_index/bitmap_index_result.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" +#include "paimon/status.h" + +namespace paimon { + +RangeBitmapFileIndex::RangeBitmapFileIndex(const std::map& options) + : options_(options) {} + +Result> RangeBitmapFileIndex::CreateReader( + ::ArrowSchema* const arrow_schema, const int32_t start, const int32_t length, + const std::shared_ptr& input_stream, + const std::shared_ptr& pool) const { + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr arrow_schema_ptr, + arrow::ImportSchema(arrow_schema)); + if (arrow_schema_ptr->num_fields() != 1) { + return Status::Invalid( + "invalid schema for RangeBitmapFileIndexReader, supposed to have single field."); + } + const auto arrow_type = arrow_schema_ptr->field(0)->type(); + return RangeBitmapFileIndexReader::Create(arrow_type, start, length, input_stream, pool); +} + +Result> RangeBitmapFileIndex::CreateWriter( + ::ArrowSchema* arrow_schema, const std::shared_ptr& pool) const { + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr arrow_schema_ptr, + arrow::ImportSchema(arrow_schema)); + if (arrow_schema_ptr->num_fields() != 1) { + return Status::Invalid( + "invalid schema for RangeBitmapFileIndexWriter, supposed to have single field."); + } + const auto arrow_field = arrow_schema_ptr->field(0); + return RangeBitmapFileIndexWriter::Create(arrow_schema_ptr, arrow_field->name(), options_, + pool); +} + +Result> RangeBitmapFileIndexWriter::Create( + const std::shared_ptr& arrow_schema, const std::string& field_name, + const std::map& options, const std::shared_ptr& pool) { + const auto field = arrow_schema->GetFieldByName(field_name); + if (!field) { + return Status::Invalid(fmt::format("Field not found in schema: {}", field_name)); + } + PAIMON_ASSIGN_OR_RAISE(FieldType field_type, + FieldTypeUtils::ConvertToFieldType(field->type()->id())); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr shared_key_factory, + KeyFactory::Create(field_type)); + PAIMON_ASSIGN_OR_RAISE(int64_t parsed_chunk_size, + MemorySize::ParseBytes(KeyFactory::kDefaultChunkSize)); + if (const auto chunk_size_it = options.find(RangeBitmapFileIndex::kChunkSize); + chunk_size_it != options.end()) { + PAIMON_ASSIGN_OR_RAISE(parsed_chunk_size, MemorySize::ParseBytes(chunk_size_it->second)); + } + auto struct_type = arrow::struct_({field}); + PAIMON_ASSIGN_OR_RAISE(auto appender_ptr, RangeBitmap::Appender::Create( + shared_key_factory, parsed_chunk_size, pool)); + return std::make_shared( + struct_type, field->type(), options, pool, shared_key_factory, std::move(appender_ptr)); +} + +Status RangeBitmapFileIndexWriter::AddBatch(::ArrowArray* batch) { + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr array, + arrow::ImportArray(batch, struct_type_)); + auto struct_array = std::dynamic_pointer_cast(array); + if (!struct_array || struct_array->num_fields() != 1) { + return Status::Invalid( + "invalid batch for RangeBitmapFileIndexWriter, supposed to be struct array with single " + "field."); + } + PAIMON_ASSIGN_OR_RAISE(std::vector array_values, + LiteralConverter::ConvertLiteralsFromArray(*(struct_array->field(0)), + /*own_data=*/true)); + for (const auto& literal : array_values) { + appender_->Append(literal); + } + return Status::OK(); +} + +Result> RangeBitmapFileIndexWriter::SerializedBytes() const { + return appender_->Serialize(); +} + +RangeBitmapFileIndexWriter::RangeBitmapFileIndexWriter( + const std::shared_ptr& struct_type, + const std::shared_ptr& arrow_type, + const std::map& options, const std::shared_ptr& pool, + const std::shared_ptr& key_factory, std::unique_ptr appender) + : struct_type_(struct_type), + arrow_type_(arrow_type), + options_(options), + pool_(pool), + key_factory_(key_factory), + appender_(std::move(appender)) {} + +Result> RangeBitmapFileIndexReader::Create( + const std::shared_ptr& arrow_type, const int32_t start, const int32_t length, + const std::shared_ptr& input_stream, const std::shared_ptr& pool) { + PAIMON_ASSIGN_OR_RAISE(FieldType field_type, + FieldTypeUtils::ConvertToFieldType(arrow_type->id())); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr range_bitmap, + RangeBitmap::Create(input_stream, start, field_type, pool)); + return std::shared_ptr( + new RangeBitmapFileIndexReader(std::move(range_bitmap))); +} + +RangeBitmapFileIndexReader::RangeBitmapFileIndexReader(std::unique_ptr range_bitmap) + : range_bitmap_(std::move(range_bitmap)) {} + +Result> RangeBitmapFileIndexReader::VisitEqual( + const Literal& literal) { + return std::make_shared( + [self = shared_from_this(), literal]() -> Result { + if (!self->range_bitmap_) { + return RoaringBitmap32(); + } + return self->range_bitmap_->Eq(literal); + }); +} + +Result> RangeBitmapFileIndexReader::VisitNotEqual( + const Literal& literal) { + return std::make_shared( + [self = shared_from_this(), literal]() -> Result { + if (!self->range_bitmap_) { + return RoaringBitmap32(); + } + return self->range_bitmap_->Neq(literal); + }); +} + +Result> RangeBitmapFileIndexReader::VisitIn( + const std::vector& literals) { + return std::make_shared( + [self = shared_from_this(), literals]() -> Result { + if (!self->range_bitmap_) { + return RoaringBitmap32(); + } + return self->range_bitmap_->In(literals); + }); +} + +Result> RangeBitmapFileIndexReader::VisitNotIn( + const std::vector& literals) { + return std::make_shared( + [self = shared_from_this(), literals]() -> Result { + if (!self->range_bitmap_) { + return RoaringBitmap32(); + } + return self->range_bitmap_->NotIn(literals); + }); +} + +Result> RangeBitmapFileIndexReader::VisitIsNull() { + return std::make_shared( + [self = shared_from_this()]() -> Result { + if (!self->range_bitmap_) { + return RoaringBitmap32(); + } + return self->range_bitmap_->IsNull(); + }); +} + +Result> RangeBitmapFileIndexReader::VisitIsNotNull() { + return std::make_shared( + [self = shared_from_this()]() -> Result { + if (!self->range_bitmap_) { + return RoaringBitmap32(); + } + return self->range_bitmap_->IsNotNull(); + }); +} + +Result> RangeBitmapFileIndexReader::VisitGreaterThan( + const Literal& literal) { + return std::make_shared( + [self = shared_from_this(), literal]() -> Result { + if (!self->range_bitmap_) { + return RoaringBitmap32(); + } + return self->range_bitmap_->Gt(literal); + }); +} + +Result> RangeBitmapFileIndexReader::VisitLessThan( + const Literal& literal) { + return std::make_shared( + [self = shared_from_this(), literal]() -> Result { + if (!self->range_bitmap_) { + return RoaringBitmap32(); + } + return self->range_bitmap_->Lt(literal); + }); +} + +Result> RangeBitmapFileIndexReader::VisitGreaterOrEqual( + const Literal& literal) { + return std::make_shared( + [self = shared_from_this(), literal]() -> Result { + if (!self->range_bitmap_) { + return RoaringBitmap32(); + } + return self->range_bitmap_->Gte(literal); + }); +} + +Result> RangeBitmapFileIndexReader::VisitLessOrEqual( + const Literal& literal) { + return std::make_shared( + [self = shared_from_this(), literal]() -> Result { + if (!self->range_bitmap_) { + return RoaringBitmap32(); + } + return self->range_bitmap_->Lte(literal); + }); +} + +} // namespace paimon diff --git a/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index.h b/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index.h new file mode 100644 index 00000000..c15e6ce6 --- /dev/null +++ b/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index.h @@ -0,0 +1,111 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include "paimon/common/file_index/bitmap/bitmap_file_index.h" +#include "paimon/common/file_index/rangebitmap/range_bitmap.h" +#include "paimon/file_index/file_index_reader.h" +#include "paimon/file_index/file_index_writer.h" +#include "paimon/file_index/file_indexer.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" +#include "paimon/status.h" + +namespace paimon { + +class RangeBitmapFileIndexWriter; +class RangeBitmapFileIndexReader; + +class RangeBitmapFileIndex final : public FileIndexer { + public: + explicit RangeBitmapFileIndex(const std::map& options); + + ~RangeBitmapFileIndex() override = default; + + Result> CreateReader( + ::ArrowSchema* arrow_schema, int32_t start, int32_t length, + const std::shared_ptr& input_stream, + const std::shared_ptr& pool) const override; + + Result> CreateWriter( + ::ArrowSchema* arrow_schema, const std::shared_ptr& pool) const override; + + public: + static constexpr char kChunkSize[] = "chunk-size"; + + private: + std::map options_; +}; + +class RangeBitmapFileIndexWriter final : public FileIndexWriter { + public: + static Result> Create( + const std::shared_ptr& arrow_schema, const std::string& field_name, + const std::map& options, const std::shared_ptr& pool); + + Status AddBatch(::ArrowArray* batch) override; + Result> SerializedBytes() const override; + + RangeBitmapFileIndexWriter(const std::shared_ptr& struct_type, + const std::shared_ptr& arrow_type, + const std::map& options, + const std::shared_ptr& pool, + const std::shared_ptr& key_factory, + std::unique_ptr appender); + + private: + /// @note struct_type_ contains only one field with arrow_type_, used for import from C + /// interface. + std::shared_ptr struct_type_; + std::shared_ptr arrow_type_; + std::map options_; + std::shared_ptr pool_; + std::shared_ptr key_factory_; + std::unique_ptr appender_; +}; + +class RangeBitmapFileIndexReader final + : public FileIndexReader, + public std::enable_shared_from_this { + public: + static Result> Create( + const std::shared_ptr& arrow_type, int32_t start, int32_t length, + const std::shared_ptr& input_stream, const std::shared_ptr& pool); + + private: + explicit RangeBitmapFileIndexReader(std::unique_ptr range_bitmap); + + Result> VisitEqual(const Literal& literal) override; + Result> VisitNotEqual(const Literal& literal) override; + Result> VisitIn(const std::vector& literals) override; + Result> VisitNotIn( + const std::vector& literals) override; + Result> VisitIsNull() override; + Result> VisitIsNotNull() override; + Result> VisitGreaterThan(const Literal& literal) override; + Result> VisitLessThan(const Literal& literal) override; + Result> VisitGreaterOrEqual(const Literal& literal) override; + Result> VisitLessOrEqual(const Literal& literal) override; + + std::unique_ptr range_bitmap_; +}; + +} // namespace paimon diff --git a/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index_factory.cpp b/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index_factory.cpp new file mode 100644 index 00000000..c27736fd --- /dev/null +++ b/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index_factory.cpp @@ -0,0 +1,32 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/common/file_index/rangebitmap/range_bitmap_file_index_factory.h" + +#include "paimon/common/file_index/rangebitmap/range_bitmap_file_index.h" + +namespace paimon { + +RangeBitmapFileIndexFactory::~RangeBitmapFileIndexFactory() = default; + +Result> RangeBitmapFileIndexFactory::Create( + const std::map& options) const { + return std::make_unique(options); +} + +REGISTER_PAIMON_FACTORY(RangeBitmapFileIndexFactory); + +} // namespace paimon diff --git a/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index_factory.h b/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index_factory.h new file mode 100644 index 00000000..844bd5d2 --- /dev/null +++ b/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index_factory.h @@ -0,0 +1,41 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include "paimon/file_index/file_indexer.h" +#include "paimon/file_index/file_indexer_factory.h" +#include "paimon/result.h" + +namespace paimon { + +class PAIMON_EXPORT RangeBitmapFileIndexFactory final : public FileIndexerFactory { + public: + const char* Identifier() const override { + return "range-bitmap"; + } + + ~RangeBitmapFileIndexFactory() override; + + Result> Create( + const std::map& options) const override; +}; + +} // namespace paimon diff --git a/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index_test.cpp b/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index_test.cpp new file mode 100644 index 00000000..7a78fcc0 --- /dev/null +++ b/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index_test.cpp @@ -0,0 +1,567 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/common/file_index/rangebitmap/range_bitmap_file_index.h" + +#include + +#include +#include +#include + +#include "arrow/api.h" +#include "arrow/c/bridge.h" +#include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/file_index/bitmap_index_result.h" +#include "paimon/file_index/file_index_format.h" +#include "paimon/file_index/file_indexer_factory.h" +#include "paimon/fs/file_system.h" +#include "paimon/fs/local/local_file_system.h" +#include "paimon/io/byte_array_input_stream.h" +#include "paimon/memory/memory_pool.h" +#include "paimon/predicate/literal.h" +#include "paimon/testing/utils/testharness.h" + +namespace paimon::test { + +class RangeBitmapFileIndexTest : public ::testing::Test { + public: + void SetUp() override { + pool_ = GetDefaultPool(); + fs_ = std::make_shared(); + } + + void TearDown() override { + index_buffer_.reset(); + pool_.reset(); + fs_.reset(); + } + + static void CheckResult(const std::shared_ptr& result, + const std::vector& expected) { + const auto typed_result = std::dynamic_pointer_cast(result); + ASSERT_TRUE(typed_result); + ASSERT_OK_AND_ASSIGN(const RoaringBitmap32* bitmap, typed_result->GetBitmap()); + ASSERT_TRUE(bitmap); + const RoaringBitmap32 expected_bitmap = RoaringBitmap32::From(expected); + ASSERT_EQ(*bitmap, expected_bitmap) + << "result=" << bitmap->ToString() << ", expected=" << expected_bitmap.ToString(); + } + + // Helper function to create writer, serialize, and create reader + template + Result> CreateReaderForTest( + const std::shared_ptr& arrow_type, const std::vector& test_data, + PAIMON_UNIQUE_PTR* serialized_bytes_out) { + return CreateReaderForTest(arrow_type, test_data, {}, {}, + serialized_bytes_out); + } + + // Overload with NULL support - null_indices specifies which positions are NULL + template + Result> CreateReaderForTest( + const std::shared_ptr& arrow_type, const std::vector& test_data, + const std::set& null_indices, PAIMON_UNIQUE_PTR* serialized_bytes_out) { + return CreateReaderForTest(arrow_type, test_data, null_indices, {}, + serialized_bytes_out); + } + + // Overload with options to exercise writer configuration such as chunk size. + template + Result> CreateReaderForTest( + const std::shared_ptr& arrow_type, const std::vector& test_data, + const std::map& options, + PAIMON_UNIQUE_PTR* serialized_bytes_out) { + return CreateReaderForTest(arrow_type, test_data, {}, options, + serialized_bytes_out); + } + + // Full overload with NULL support and options + template + Result> CreateReaderForTest( + const std::shared_ptr& arrow_type, const std::vector& test_data, + const std::set& null_indices, const std::map& options, + PAIMON_UNIQUE_PTR* serialized_bytes_out); + + protected: + std::shared_ptr pool_; + + private: + std::shared_ptr fs_; + std::shared_ptr index_buffer_; +}; + +template +Result> RangeBitmapFileIndexTest::CreateReaderForTest( + const std::shared_ptr& arrow_type, const std::vector& test_data, + const std::set& null_indices, const std::map& options, + PAIMON_UNIQUE_PTR* serialized_bytes_out) { + // Create Arrow array from test data, replacing values at null_indices with NULL + auto builder = std::make_shared(); + for (size_t i = 0; i < test_data.size(); ++i) { + if (null_indices.count(static_cast(i)) > 0) { + PAIMON_RETURN_NOT_OK_FROM_ARROW(builder->AppendNull()); + } else { + PAIMON_RETURN_NOT_OK_FROM_ARROW(builder->Append(test_data[i])); + } + } + std::shared_ptr arrow_array; + PAIMON_RETURN_NOT_OK_FROM_ARROW(builder->Finish(&arrow_array)); + // Wrap in StructArray (single field) as required by RangeBitmapFileIndexWriter + arrow::FieldVector fields = {arrow::field("test_field", arrow_type)}; + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr struct_array, + arrow::StructArray::Make({arrow_array}, fields)); + auto c_array = std::make_unique<::ArrowArray>(); + PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportArray(*struct_array, c_array.get())); + // Create schema for the field + const auto schema = arrow::schema({arrow::field("test_field", arrow_type)}); + // Create writer + PAIMON_ASSIGN_OR_RAISE( + std::shared_ptr writer, + RangeBitmapFileIndexWriter::Create(schema, "test_field", options, pool_)); + // Add the batch + PAIMON_RETURN_NOT_OK(writer->AddBatch(c_array.get())); + // Get serialized payload + PAIMON_ASSIGN_OR_RAISE(PAIMON_UNIQUE_PTR serialized_bytes, writer->SerializedBytes()); + if (!serialized_bytes || serialized_bytes->size() == 0) { + return Status::Invalid("Serialized bytes is empty"); + } + *serialized_bytes_out = std::move(serialized_bytes); + const auto input_stream = std::make_shared( + (*serialized_bytes_out)->data(), (*serialized_bytes_out)->size()); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr reader, + RangeBitmapFileIndexReader::Create( + arrow_type, 0, static_cast((*serialized_bytes_out)->size()), + input_stream, pool_)); + return reader; +} + +// Test with all NULL values +TEST_F(RangeBitmapFileIndexTest, TestAllNullValues) { + constexpr int num_rows = 10; + std::vector test_data(num_rows, 0); // placeholders, all will be NULL + std::set null_indices; + for (int32_t i = 0; i < num_rows; ++i) { + null_indices.insert(i); + } + const auto& arrow_type = arrow::int32(); + PAIMON_UNIQUE_PTR serialized_bytes; + ASSERT_OK_AND_ASSIGN(auto reader, (CreateReaderForTest( + arrow_type, test_data, null_indices, &serialized_bytes))); + + // Test IsNull - should return all positions + std::vector all_positions(num_rows); + std::iota(all_positions.begin(), all_positions.end(), 0); + ASSERT_OK_AND_ASSIGN(auto is_null_result, reader->VisitIsNull()); + CheckResult(is_null_result, all_positions); + + // Test IsNotNull - should return empty + ASSERT_OK_AND_ASSIGN(auto is_not_null_result, reader->VisitIsNotNull()); + CheckResult(is_not_null_result, {}); + + // Test other queries with all NULL data - should return empty + ASSERT_OK_AND_ASSIGN(auto eq_result, reader->VisitEqual(Literal(FieldType::INT, 42))); + CheckResult(eq_result, {}); + + ASSERT_OK_AND_ASSIGN(auto gt_result, reader->VisitGreaterThan(Literal(FieldType::INT, 0))); + CheckResult(gt_result, {}); + + ASSERT_OK_AND_ASSIGN(auto lt_result, reader->VisitLessThan(Literal(FieldType::INT, 100))); + CheckResult(lt_result, {}); +} + +// Test with empty data - no rows at all +TEST_F(RangeBitmapFileIndexTest, TestEmptyRangeBitmap) { + std::vector test_data; // empty + const auto& arrow_type = arrow::int32(); + PAIMON_UNIQUE_PTR serialized_bytes; + ASSERT_OK_AND_ASSIGN(auto reader, (CreateReaderForTest( + arrow_type, test_data, &serialized_bytes))); + + // All queries should return empty results for empty bitmap + ASSERT_OK_AND_ASSIGN(auto is_null_result, reader->VisitIsNull()); + CheckResult(is_null_result, {}); + + ASSERT_OK_AND_ASSIGN(auto is_not_null_result, reader->VisitIsNotNull()); + CheckResult(is_not_null_result, {}); + + ASSERT_OK_AND_ASSIGN(auto eq_result, reader->VisitEqual(Literal(FieldType::INT, 42))); + CheckResult(eq_result, {}); + + ASSERT_OK_AND_ASSIGN(auto gt_result, reader->VisitGreaterThan(Literal(FieldType::INT, 0))); + CheckResult(gt_result, {}); + + ASSERT_OK_AND_ASSIGN(auto lt_result, reader->VisitLessThan(Literal(FieldType::INT, 100))); + CheckResult(lt_result, {}); + + ASSERT_OK_AND_ASSIGN(auto gte_result, reader->VisitGreaterOrEqual(Literal(FieldType::INT, 0))); + CheckResult(gte_result, {}); + + ASSERT_OK_AND_ASSIGN(auto lte_result, reader->VisitLessOrEqual(Literal(FieldType::INT, 100))); + CheckResult(lte_result, {}); +} + +TEST_F(RangeBitmapFileIndexTest, TestWriteAndReadRangeBitmapIndexMultiChunk) { + // Use many distinct values and a very small chunk size to force multiple + // dictionary chunks when writing the range bitmap index. + std::vector test_data(100); + std::iota(test_data.begin(), test_data.end(), 0); + + const auto& arrow_type = arrow::int32(); + std::map options; + // Configure a very small chunk size in bytes so that the dictionary must + // be split into multiple chunks. + options[RangeBitmapFileIndex::kChunkSize] = "86b"; + + PAIMON_UNIQUE_PTR serialized_bytes; + ASSERT_OK_AND_ASSIGN(auto reader, (CreateReaderForTest( + arrow_type, test_data, options, &serialized_bytes))); + ASSERT_OK_AND_ASSIGN(auto eq_0_result, reader->VisitEqual(Literal(static_cast(0)))); + CheckResult(eq_0_result, {0}); + + ASSERT_OK_AND_ASSIGN(auto eq_50_result, reader->VisitEqual(Literal(static_cast(50)))); + CheckResult(eq_50_result, {50}); + ASSERT_OK_AND_ASSIGN(auto eq_51_result, reader->VisitEqual(Literal(static_cast(51)))); + CheckResult(eq_51_result, {51}); + ASSERT_OK_AND_ASSIGN(auto eq_99_result, reader->VisitEqual(Literal(static_cast(99)))); + CheckResult(eq_99_result, {99}); + + ASSERT_OK_AND_ASSIGN(auto gt_49_result, + reader->VisitGreaterThan(Literal(static_cast(49)))); + // Positions 50..99 + std::vector expected_gt_49(50); + std::iota(expected_gt_49.begin(), expected_gt_49.end(), 50); + CheckResult(gt_49_result, expected_gt_49); + + ASSERT_OK_AND_ASSIGN(auto lt_10_result, + reader->VisitLessThan(Literal(static_cast(10)))); + // Positions 0..9 + std::vector expected_lt_10(10); + std::iota(expected_lt_10.begin(), expected_lt_10.end(), 0); + CheckResult(lt_10_result, expected_lt_10); + + // is_not_null should cover all rows. + std::vector all_positions(100); + std::iota(all_positions.begin(), all_positions.end(), 0); + ASSERT_OK_AND_ASSIGN(auto is_not_null_result, reader->VisitIsNotNull()); + CheckResult(is_not_null_result, all_positions); +} + +// test data mixed with NULLs +TEST_F(RangeBitmapFileIndexTest, TestWriteAndReadRangeBitmapIndexBigInt) { + // Data: 10, NULL, 10, 30, NULL, 40, 50 (NULLs at positions 1 and 4) + std::vector test_data = {10, 20, 10, 30, 20, 40, 50}; // 20 at pos 1,4 will be NULL + std::set null_indices = {1, 4}; + const auto& arrow_type = arrow::int64(); + PAIMON_UNIQUE_PTR serialized_bytes; + ASSERT_OK_AND_ASSIGN(auto reader, (CreateReaderForTest( + arrow_type, test_data, null_indices, &serialized_bytes))); + + // Test equality queries (NULL positions excluded) + ASSERT_OK_AND_ASSIGN(auto eq_10_result, reader->VisitEqual(Literal(static_cast(10)))); + CheckResult(eq_10_result, {0, 2}); // positions 0 and 2 have value 10 + ASSERT_OK_AND_ASSIGN(auto eq_20_result, reader->VisitEqual(Literal(static_cast(20)))); + CheckResult(eq_20_result, {}); // no value 20 (positions 1 and 4 are NULL) + ASSERT_OK_AND_ASSIGN(auto eq_30_result, reader->VisitEqual(Literal(static_cast(30)))); + CheckResult(eq_30_result, {3}); // position 3 has value 30 + ASSERT_OK_AND_ASSIGN(auto eq_40_result, reader->VisitEqual(Literal(static_cast(40)))); + CheckResult(eq_40_result, {5}); // position 5 has value 40 + ASSERT_OK_AND_ASSIGN(auto eq_50_result, reader->VisitEqual(Literal(static_cast(50)))); + CheckResult(eq_50_result, {6}); // position 6 has value 50 + + // Test range queries (NULL positions excluded) + ASSERT_OK_AND_ASSIGN(auto gt_25_result, + reader->VisitGreaterThan(Literal(static_cast(25)))); + CheckResult(gt_25_result, {3, 5, 6}); // values > 25: 30, 40, 50 + + ASSERT_OK_AND_ASSIGN(auto lt_35_result, + reader->VisitLessThan(Literal(static_cast(35)))); + CheckResult(lt_35_result, {0, 2, 3}); // values < 35: 10, 10, 30 (NULLs excluded) + ASSERT_OK_AND_ASSIGN(auto gte_20_result, + reader->VisitGreaterOrEqual(Literal(static_cast(20)))); + CheckResult(gte_20_result, {3, 5, 6}); // values >= 20: 30, 40, 50 (NULLs excluded) + ASSERT_OK_AND_ASSIGN(auto lte_40_result, + reader->VisitLessOrEqual(Literal(static_cast(40)))); + CheckResult(lte_40_result, {0, 2, 3, 5}); // values <= 40: 10, 10, 30, 40 (NULLs excluded) + + // Test IN queries (NULL positions excluded) + std::vector in_values = {Literal(static_cast(10)), + Literal(static_cast(30))}; + ASSERT_OK_AND_ASSIGN(auto in_result, reader->VisitIn(in_values)); + CheckResult(in_result, {0, 2, 3}); // positions with values 10 or 30 + ASSERT_OK_AND_ASSIGN(auto not_in_result, reader->VisitNotIn(in_values)); + CheckResult(not_in_result, {5, 6}); // positions with values NOT 10 or 30 (NULLs excluded) + ASSERT_OK_AND_ASSIGN(auto is_null_result, reader->VisitIsNull()); + CheckResult(is_null_result, {1, 4}); // positions 1 and 4 are NULL + std::vector not_null_positions = {0, 2, 3, 5, 6}; + ASSERT_OK_AND_ASSIGN(auto is_not_null_result, reader->VisitIsNotNull()); + CheckResult(is_not_null_result, not_null_positions); // non-NULL positions +} + +TEST_F(RangeBitmapFileIndexTest, TestWriteAndReadRangeBitmapIndexInt) { + std::vector test_data = {10, 20, 10, 30, 20, 40, 50}; + const auto& arrow_type = arrow::int32(); + PAIMON_UNIQUE_PTR serialized_bytes; + ASSERT_OK_AND_ASSIGN(auto reader, (CreateReaderForTest( + arrow_type, test_data, &serialized_bytes))); + + // Test equality queries + ASSERT_OK_AND_ASSIGN(auto eq_10_result, reader->VisitEqual(Literal(static_cast(10)))); + CheckResult(eq_10_result, {0, 2}); + ASSERT_OK_AND_ASSIGN(auto eq_20_result, reader->VisitEqual(Literal(static_cast(20)))); + CheckResult(eq_20_result, {1, 4}); + ASSERT_OK_AND_ASSIGN(auto eq_30_result, reader->VisitEqual(Literal(static_cast(30)))); + CheckResult(eq_30_result, {3}); + + // Test range queries + ASSERT_OK_AND_ASSIGN(auto gt_25_result, + reader->VisitGreaterThan(Literal(static_cast(25)))); + CheckResult(gt_25_result, {3, 5, 6}); // values > 25: 30, 40, 50 + ASSERT_OK_AND_ASSIGN(auto lt_35_result, + reader->VisitLessThan(Literal(static_cast(35)))); + CheckResult(lt_35_result, {0, 1, 2, 3, 4}); // values < 35 + ASSERT_OK_AND_ASSIGN(auto is_null_result, reader->VisitIsNull()); + CheckResult(is_null_result, {}); + std::vector all_positions = {0, 1, 2, 3, 4, 5, 6}; + ASSERT_OK_AND_ASSIGN(auto is_not_null_result, reader->VisitIsNotNull()); + CheckResult(is_not_null_result, all_positions); + ASSERT_OK_AND_ASSIGN(auto gte_20_result, + reader->VisitGreaterOrEqual(Literal(static_cast(20)))); + CheckResult(gte_20_result, {1, 3, 4, 5, 6}); + ASSERT_OK_AND_ASSIGN(auto lte_40_result, + reader->VisitLessOrEqual(Literal(static_cast(40)))) + CheckResult(lte_40_result, {0, 1, 2, 3, 4, 5}); + + // Test empty result cases for INT values that don't exist + ASSERT_OK_AND_ASSIGN(auto eq_nonexistent_int_result, + reader->VisitEqual(Literal(static_cast(25)))); + CheckResult(eq_nonexistent_int_result, {}); // 25 doesn't exist in data {10,20,30,40,50} + + ASSERT_OK_AND_ASSIGN(auto eq_out_of_range_high_int_result, + reader->VisitEqual(Literal(static_cast(100)))); + CheckResult(eq_out_of_range_high_int_result, {}); // Value above maximum (50) + + ASSERT_OK_AND_ASSIGN(auto eq_out_of_range_low_int_result, + reader->VisitEqual(Literal(static_cast(5)))); + CheckResult(eq_out_of_range_low_int_result, {}); // Value below minimum (10) + + // Test NotEqual operations + ASSERT_OK_AND_ASSIGN(auto ne_10_result, + reader->VisitNotEqual(Literal(static_cast(10)))); + CheckResult(ne_10_result, {1, 3, 4, 5, 6}); // All positions except {0, 2} where 10 appears + + ASSERT_OK_AND_ASSIGN(auto ne_nonexistent_result, + reader->VisitNotEqual(Literal(static_cast(99)))); + CheckResult(ne_nonexistent_result, {0, 1, 2, 3, 4, 5, 6}); // All positions (non-empty result) + + // Test NotIn operations + ASSERT_OK_AND_ASSIGN(auto not_in_single_result, + reader->VisitNotIn({Literal(static_cast(10))})); + CheckResult(not_in_single_result, {1, 3, 4, 5, 6}); // All positions except where 10 appears + + ASSERT_OK_AND_ASSIGN( + auto not_in_multiple_result, + reader->VisitNotIn({Literal(static_cast(10)), Literal(static_cast(20))})); + CheckResult(not_in_multiple_result, {3, 5, 6}); // Positions not containing 10 or 20 + + ASSERT_OK_AND_ASSIGN(auto not_in_nonexistent_result, + reader->VisitNotIn({Literal(static_cast(99))})); + CheckResult(not_in_nonexistent_result, + {0, 1, 2, 3, 4, 5, 6}); // All positions (non-empty result) + + // Test NotIn with empty result - all values are NOT IN the complete set + std::vector all_values = { + Literal(static_cast(10)), Literal(static_cast(20)), + Literal(static_cast(30)), Literal(static_cast(40)), + Literal(static_cast(50))}; + ASSERT_OK_AND_ASSIGN(auto not_in_all_result, reader->VisitNotIn(all_values)); + CheckResult(not_in_all_result, + {}); // Empty result - no positions left when excluding all existing values +} + +TEST_F(RangeBitmapFileIndexTest, TestWriteAndReadRangeBitmapIndexSmallInt) { + std::vector test_data = {10, 20, 10, 30, 20, 40, 50}; + const auto& arrow_type = arrow::int16(); + PAIMON_UNIQUE_PTR serialized_bytes; + ASSERT_OK_AND_ASSIGN(auto reader, (CreateReaderForTest( + arrow_type, test_data, &serialized_bytes))); + ASSERT_OK_AND_ASSIGN(auto eq_10_result, reader->VisitEqual(Literal(static_cast(10)))); + CheckResult(eq_10_result, {0, 2}); + ASSERT_OK_AND_ASSIGN(auto eq_20_result, reader->VisitEqual(Literal(static_cast(20)))); + CheckResult(eq_20_result, {1, 4}); + ASSERT_OK_AND_ASSIGN(auto eq_30_result, reader->VisitEqual(Literal(static_cast(30)))); + CheckResult(eq_30_result, {3}); + ASSERT_OK_AND_ASSIGN(auto gt_25_result, + reader->VisitGreaterThan(Literal(static_cast(25)))); + CheckResult(gt_25_result, {3, 5, 6}); // values > 25: 30, 40, 50 + ASSERT_OK_AND_ASSIGN(auto lt_35_result, + reader->VisitLessThan(Literal(static_cast(35)))); + CheckResult(lt_35_result, {0, 1, 2, 3, 4}); // values < 35 + ASSERT_OK_AND_ASSIGN(auto is_null_result, reader->VisitIsNull()); + CheckResult(is_null_result, {}); + std::vector all_positions = {0, 1, 2, 3, 4, 5, 6}; + ASSERT_OK_AND_ASSIGN(auto is_not_null_result, reader->VisitIsNotNull()); + CheckResult(is_not_null_result, all_positions); +} + +TEST_F(RangeBitmapFileIndexTest, TestWriteAndReadRangeBitmapIndexTinyInt) { + std::vector test_data = {10, 20, 10, 30, 20, 40, 50}; + const auto& arrow_type = arrow::int8(); + PAIMON_UNIQUE_PTR serialized_bytes; + ASSERT_OK_AND_ASSIGN(auto reader, (CreateReaderForTest( + arrow_type, test_data, &serialized_bytes))); + ASSERT_OK_AND_ASSIGN(auto eq_10_result, reader->VisitEqual(Literal(static_cast(10)))); + CheckResult(eq_10_result, {0, 2}); + ASSERT_OK_AND_ASSIGN(auto eq_20_result, reader->VisitEqual(Literal(static_cast(20)))); + CheckResult(eq_20_result, {1, 4}); + ASSERT_OK_AND_ASSIGN(auto eq_30_result, reader->VisitEqual(Literal(static_cast(30)))); + CheckResult(eq_30_result, {3}); + ASSERT_OK_AND_ASSIGN(auto gt_25_result, + reader->VisitGreaterThan(Literal(static_cast(25)))); + CheckResult(gt_25_result, {3, 5, 6}); // values > 25: 30, 40, 50 + ASSERT_OK_AND_ASSIGN(auto lt_35_result, + reader->VisitLessThan(Literal(static_cast(35)))); + CheckResult(lt_35_result, {0, 1, 2, 3, 4}); // values < 35 + ASSERT_OK_AND_ASSIGN(auto is_null_result, reader->VisitIsNull()); + CheckResult(is_null_result, {}); + std::vector all_positions = {0, 1, 2, 3, 4, 5, 6}; + ASSERT_OK_AND_ASSIGN(auto is_not_null_result, reader->VisitIsNotNull()); + CheckResult(is_not_null_result, all_positions); +} + +TEST_F(RangeBitmapFileIndexTest, TestWriteAndReadRangeBitmapIndexBoolean) { + std::vector test_data = {true, false, true, true, false, true, false}; + const auto& arrow_type = arrow::boolean(); + PAIMON_UNIQUE_PTR serialized_bytes; + ASSERT_OK_AND_ASSIGN(auto reader, (CreateReaderForTest( + arrow_type, test_data, &serialized_bytes))); + ASSERT_OK_AND_ASSIGN(auto eq_true_result, reader->VisitEqual(Literal(true))); + CheckResult(eq_true_result, {0, 2, 3, 5}); // positions with value true + ASSERT_OK_AND_ASSIGN(auto eq_false_result, reader->VisitEqual(Literal(false))); + CheckResult(eq_false_result, {1, 4, 6}); // positions with value false + ASSERT_OK_AND_ASSIGN(auto is_null_result, reader->VisitIsNull()); + CheckResult(is_null_result, {}); + std::vector all_positions = {0, 1, 2, 3, 4, 5, 6}; + ASSERT_OK_AND_ASSIGN(auto is_not_null_result, reader->VisitIsNotNull()); + CheckResult(is_not_null_result, all_positions); +} + +TEST_F(RangeBitmapFileIndexTest, TestWriteAndReadRangeBitmapIndexFloat) { + std::vector test_data = {10.5f, 20.3f, 10.5f, 30.7f, 20.3f, 40.1f, 50.9f}; + const auto& arrow_type = arrow::float32(); + PAIMON_UNIQUE_PTR serialized_bytes; + ASSERT_OK_AND_ASSIGN(auto reader, (CreateReaderForTest( + arrow_type, test_data, &serialized_bytes))); + ASSERT_OK_AND_ASSIGN(auto eq_10_5_result, reader->VisitEqual(Literal(10.5f))); + CheckResult(eq_10_5_result, {0, 2}); // positions with value 10.5 + ASSERT_OK_AND_ASSIGN(auto eq_20_3_result, reader->VisitEqual(Literal(20.3f))); + CheckResult(eq_20_3_result, {1, 4}); // positions with value 20.3 + ASSERT_OK_AND_ASSIGN(auto eq_30_7_result, reader->VisitEqual(Literal(30.7f))); + CheckResult(eq_30_7_result, {3}); // position with value 30.7 + ASSERT_OK_AND_ASSIGN(auto gt_24_9_result, reader->VisitGreaterThan(Literal(24.9f))); + CheckResult(gt_24_9_result, {3, 5, 6}); // values > 25.0: 30.7, 40.1, 50.9 + ASSERT_OK_AND_ASSIGN(auto lt_35_result, reader->VisitLessThan(Literal(35.0f))); + CheckResult(lt_35_result, {0, 1, 2, 3, 4}); // values < 35.0 + + // Test empty result cases for float values that don't exist + ASSERT_OK_AND_ASSIGN(auto eq_nonexistent_float_result, reader->VisitEqual(Literal(25.0f))); + CheckResult(eq_nonexistent_float_result, {}); // 25.0 doesn't exist in data + + ASSERT_OK_AND_ASSIGN(auto eq_out_of_range_high_result, reader->VisitEqual(Literal(100.0f))); + CheckResult(eq_out_of_range_high_result, {}); // Value above maximum + + ASSERT_OK_AND_ASSIGN(auto eq_out_of_range_low_result, reader->VisitEqual(Literal(5.0f))); + CheckResult(eq_out_of_range_low_result, {}); // Value below minimum + + ASSERT_OK_AND_ASSIGN(auto is_null_result, reader->VisitIsNull()); + CheckResult(is_null_result, {}); + std::vector all_positions = {0, 1, 2, 3, 4, 5, 6}; + ASSERT_OK_AND_ASSIGN(auto is_not_null_result, reader->VisitIsNotNull()); + CheckResult(is_not_null_result, all_positions); +} + +TEST_F(RangeBitmapFileIndexTest, TestWriteAndReadRangeBitmapIndexDouble) { + std::vector test_data = {10.5, 20.3, 10.5, 30.7, 20.3, 40.1, 50.9}; + const auto& arrow_type = arrow::float64(); + PAIMON_UNIQUE_PTR serialized_bytes; + ASSERT_OK_AND_ASSIGN(auto reader, (CreateReaderForTest( + arrow_type, test_data, &serialized_bytes))); + ASSERT_OK_AND_ASSIGN(auto eq_10_5_result, reader->VisitEqual(Literal(10.5))); + CheckResult(eq_10_5_result, {0, 2}); // positions with value 10.5 + ASSERT_OK_AND_ASSIGN(auto eq_20_3_result, reader->VisitEqual(Literal(20.3))); + CheckResult(eq_20_3_result, {1, 4}); // positions with value 20.3 + ASSERT_OK_AND_ASSIGN(auto eq_30_7_result, reader->VisitEqual(Literal(30.7))); + CheckResult(eq_30_7_result, {3}); // position with value 30.7 + ASSERT_OK_AND_ASSIGN(auto gt_24_9_result, reader->VisitGreaterThan(Literal(24.9))); + CheckResult(gt_24_9_result, {3, 5, 6}); // values > 25.0: 30.7, 40.1, 50.9 + ASSERT_OK_AND_ASSIGN(auto lt_35_result, reader->VisitLessThan(Literal(35.0))); + CheckResult(lt_35_result, {0, 1, 2, 3, 4}); // values < 35.0 + ASSERT_OK_AND_ASSIGN(auto is_null_result, reader->VisitIsNull()); + CheckResult(is_null_result, {}); + std::vector all_positions = {0, 1, 2, 3, 4, 5, 6}; + ASSERT_OK_AND_ASSIGN(auto is_not_null_result, reader->VisitIsNotNull()); + CheckResult(is_not_null_result, all_positions); +} + +TEST_F(RangeBitmapFileIndexTest, TestWriteAndReadRangeBitmapIndexDate) { + std::vector test_data = {42432, 24649, 42432, 38001, 24649, 50000, 12000}; + const auto& arrow_type = arrow::date32(); + PAIMON_UNIQUE_PTR serialized_bytes; + ASSERT_OK_AND_ASSIGN(auto reader, (CreateReaderForTest( + arrow_type, test_data, &serialized_bytes))); + ASSERT_OK_AND_ASSIGN(auto eq_42432_result, reader->VisitEqual(Literal(FieldType::DATE, 42432))); + CheckResult(eq_42432_result, {0, 2}); + ASSERT_OK_AND_ASSIGN(auto eq_24649_result, reader->VisitEqual(Literal(FieldType::DATE, 24649))); + CheckResult(eq_24649_result, {1, 4}); + ASSERT_OK_AND_ASSIGN(auto eq_38001_result, reader->VisitEqual(Literal(FieldType::DATE, 38001))); + CheckResult(eq_38001_result, {3}); + ASSERT_OK_AND_ASSIGN(auto gt_result, + reader->VisitGreaterOrEqual(Literal(FieldType::DATE, 30000))); + CheckResult(gt_result, {0, 2, 3, 5}); // 42432, 38001, 50000 + + ASSERT_OK_AND_ASSIGN(auto lt_result, reader->VisitLessThan(Literal(FieldType::DATE, 40000))); + CheckResult(lt_result, {1, 3, 4, 6}); // 24649, 38001, 12000 + + // Test empty result cases - values that don't exist in the data + ASSERT_OK_AND_ASSIGN(auto eq_nonexistent_low_result, + reader->VisitEqual(Literal(FieldType::DATE, 47432))); + CheckResult(eq_nonexistent_low_result, {}); + + ASSERT_OK_AND_ASSIGN(auto eq_nonexistent_mid_result, + reader->VisitEqual(Literal(FieldType::DATE, 30000))); + CheckResult(eq_nonexistent_mid_result, {}); // Value in middle range but doesn't exist + + ASSERT_OK_AND_ASSIGN(auto eq_nonexistent_high_result, + reader->VisitEqual(Literal(FieldType::DATE, 60000))); + CheckResult(eq_nonexistent_high_result, {}); // Value above maximum (50000) + + // Test range queries that should return empty results + ASSERT_OK_AND_ASSIGN(auto gt_all_result, + reader->VisitGreaterOrEqual(Literal(FieldType::DATE, 60000))); + CheckResult(gt_all_result, {}); // Greater than maximum should return empty + + ASSERT_OK_AND_ASSIGN(auto lt_all_result, + reader->VisitLessThan(Literal(FieldType::DATE, 10000))); + CheckResult(lt_all_result, {}); // Less than minimum should return empty + + ASSERT_OK_AND_ASSIGN(auto is_null_result, reader->VisitIsNull()); + CheckResult(is_null_result, {}); + std::vector all_positions = {0, 1, 2, 3, 4, 5, 6}; + ASSERT_OK_AND_ASSIGN(auto is_not_null_result, reader->VisitIsNotNull()); + CheckResult(is_not_null_result, all_positions); +} + +} // namespace paimon::test diff --git a/test/inte/read_inte_with_index_test.cpp b/test/inte/read_inte_with_index_test.cpp index bd26f187..957d62b9 100644 --- a/test/inte/read_inte_with_index_test.cpp +++ b/test/inte/read_inte_with_index_test.cpp @@ -45,6 +45,7 @@ #include "paimon/data/timestamp.h" #include "paimon/defs.h" #include "paimon/factories/factory_creator.h" +#include "paimon/fs/local/local_file_system.h" #include "paimon/memory/bytes.h" #include "paimon/memory/memory_pool.h" #include "paimon/metrics.h" @@ -372,6 +373,295 @@ class ReadInteWithIndexTest : public testing::Test, } } + void CheckResultForRangeBitmap(const std::string& path, + const std::shared_ptr& arrow_data_type, + const std::shared_ptr& split) const { + { + // test with no predicate - return all 8 rows + std::shared_ptr expected_array; + auto array_status = arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, + { + R"([ +[0, 17, 100, 1.1, 11.11, 19739, "row0"], +[0, 3, 200, 2.2, 22.22, 19725, "row1"] +])", + R"([ +[0, 5, 300, 3.3, 33.33, 19727, "row2"], +[0, 7, 400, 4.4, 44.44, 19729, "row3"] +])", + R"([ +[0, 9, 500, 5.5, 55.55, 19731, "row4"], +[0, null, null, null, null, null, null] +])", + R"([ +[0, null, null, null, null, null, "null_row"], +[0, 10, 600, 6.6, 66.66, 19732, "row7"] +])"}, + &expected_array); + std::cout << array_status.message() << std::endl; + ASSERT_TRUE(array_status.ok()); + CheckResult(path, {split}, /*predicate=*/nullptr, expected_array); + } + { + // Test equal predicate: f0 = 17 -> row 0 + auto predicate = PredicateBuilder::Equal(/*field_index=*/0, /*field_name=*/"f0", + FieldType::INT, Literal(17)); + std::shared_ptr expected_array; + auto array_status = + arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([ +[0, 17, 100, 1.1, 11.11, 19739, "row0"] +])"}, + &expected_array); + ASSERT_TRUE(array_status.ok()); + CheckResult(path, {split}, predicate, expected_array); + } + { + // Test less than predicate: f0 < 10 -> rows 1,2,3,4 (values 3,5,7,9) + auto predicate = PredicateBuilder::LessThan(/*field_index=*/0, /*field_name=*/"f0", + FieldType::INT, Literal(10)); + std::shared_ptr expected_array; + auto array_status = + arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([ +[0, 3, 200, 2.2, 22.22, 19725, "row1"], +[0, 5, 300, 3.3, 33.33, 19727, "row2"], +[0, 7, 400, 4.4, 44.44, 19729, "row3"], +[0, 9, 500, 5.5, 55.55, 19731, "row4"] +])"}, + &expected_array); + ASSERT_TRUE(array_status.ok()); + CheckResult(path, {split}, predicate, expected_array); + } + { + // Test greater than predicate: f0 > 5 -> rows 0,3,4,7 (values 17,7,9,10) + auto predicate = PredicateBuilder::GreaterThan(/*field_index=*/0, /*field_name=*/"f0", + FieldType::INT, Literal(5)); + std::shared_ptr expected_array; + auto array_status = + arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([ +[0, 17, 100, 1.1, 11.11, 19739, "row0"], +[0, 7, 400, 4.4, 44.44, 19729, "row3"], +[0, 9, 500, 5.5, 55.55, 19731, "row4"], +[0, 10, 600, 6.6, 66.66, 19732, "row7"] +])"}, + &expected_array); + ASSERT_TRUE(array_status.ok()); + CheckResult(path, {split}, predicate, expected_array); + } + { + // Test is null predicate on f0 -> rows 5, 6 + auto predicate = + PredicateBuilder::IsNull(/*field_index=*/0, /*field_name=*/"f0", FieldType::INT); + std::shared_ptr expected_array; + auto array_status = + arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([ +[0, null, null, null, null, null, null], +[0, null, null, null, null, null, "null_row"] +])"}, + &expected_array); + ASSERT_TRUE(array_status.ok()); + CheckResult(path, {split}, predicate, expected_array); + } + { + // Test is not null predicate on f0 -> rows 0,1,2,3,4,7 + auto predicate = + PredicateBuilder::IsNotNull(/*field_index=*/0, /*field_name=*/"f0", FieldType::INT); + std::shared_ptr expected_array; + auto array_status = + arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([ +[0, 17, 100, 1.1, 11.11, 19739, "row0"], +[0, 3, 200, 2.2, 22.22, 19725, "row1"], +[0, 5, 300, 3.3, 33.33, 19727, "row2"], +[0, 7, 400, 4.4, 44.44, 19729, "row3"], +[0, 9, 500, 5.5, 55.55, 19731, "row4"], +[0, 10, 600, 6.6, 66.66, 19732, "row7"] +])"}, + &expected_array); + ASSERT_TRUE(array_status.ok()); + CheckResult(path, {split}, predicate, expected_array); + } + { + // Test in predicate: f0 in (3, 7) -> rows 1, 3 + auto predicate = PredicateBuilder::In( + /*field_index=*/0, /*field_name=*/"f0", FieldType::INT, {Literal(3), Literal(7)}); + std::shared_ptr expected_array; + auto array_status = + arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([ +[0, 3, 200, 2.2, 22.22, 19725, "row1"], +[0, 7, 400, 4.4, 44.44, 19729, "row3"] +])"}, + &expected_array); + ASSERT_TRUE(array_status.ok()); + CheckResult(path, {split}, predicate, expected_array); + } + { + // Test not in predicate: f0 not in (3, 7) -> rows 0,2,4,7 (excluding null rows 5,6) + auto predicate = PredicateBuilder::NotIn( + /*field_index=*/0, /*field_name=*/"f0", FieldType::INT, {Literal(3), Literal(7)}); + std::shared_ptr expected_array; + auto array_status = + arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([ +[0, 17, 100, 1.1, 11.11, 19739, "row0"], +[0, 5, 300, 3.3, 33.33, 19727, "row2"], +[0, 9, 500, 5.5, 55.55, 19731, "row4"], +[0, 10, 600, 6.6, 66.66, 19732, "row7"] + ])"}, + &expected_array); + ASSERT_TRUE(array_status.ok()); + CheckResult(path, {split}, predicate, expected_array); + } + + // Test f1 (BIGINT) predicates + { + // Test greater than predicate: f1 > 300 -> rows 3,4,7 (values 400,500,600) + auto predicate = PredicateBuilder::GreaterThan(/*field_index=*/1, /*field_name=*/"f1", + FieldType::BIGINT, Literal(300L)); + std::shared_ptr expected_array; + auto array_status = + arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([ +[0, 7, 400, 4.4, 44.44, 19729, "row3"], +[0, 9, 500, 5.5, 55.55, 19731, "row4"], +[0, 10, 600, 6.6, 66.66, 19732, "row7"] +])"}, + &expected_array); + ASSERT_TRUE(array_status.ok()); + CheckResult(path, {split}, predicate, expected_array); + } + + // Test f2 (FLOAT) predicates + { + // Test less than predicate: f2 < 4.0 -> rows 0,1,2 (values 1.1,2.2,3.3) + auto predicate = PredicateBuilder::LessThan(/*field_index=*/2, /*field_name=*/"f2", + FieldType::FLOAT, Literal(4.0f)); + std::shared_ptr expected_array; + auto array_status = + arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([ +[0, 17, 100, 1.1, 11.11, 19739, "row0"], +[0, 3, 200, 2.2, 22.22, 19725, "row1"], +[0, 5, 300, 3.3, 33.33, 19727, "row2"] +])"}, + &expected_array); + ASSERT_TRUE(array_status.ok()); + CheckResult(path, {split}, predicate, expected_array); + } + // Test date type + { + // Test greater than predicate: f0 > 5 -> rows 0,3,4,7 (values 17,7,9,10) + auto predicate = + PredicateBuilder::LessOrEqual(/*field_index=*/4, /*field_name=*/"f4", + FieldType::DATE, Literal(FieldType::DATE, 19725)); + std::shared_ptr expected_array; + auto array_status = + arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([ +[0, 3, 200, 2.2, 22.22, 19725, "row1"] +])"}, + &expected_array); + ASSERT_TRUE(array_status.ok()); + CheckResult(path, {split}, predicate, expected_array); + } + + // Test f3 (DOUBLE) predicates + { + // Test greater or equal predicate: f3 >= 40.0 -> rows 3,4,7 (values 44.44,55.55,66.66) + auto predicate = PredicateBuilder::GreaterOrEqual( + /*field_index=*/3, /*field_name=*/"f3", FieldType::DOUBLE, Literal(44.44)); + std::shared_ptr expected_array; + auto array_status = + arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([ +[0, 7, 400, 4.4, 44.44, 19729, "row3"], +[0, 9, 500, 5.5, 55.55, 19731, "row4"], +[0, 10, 600, 6.6, 66.66, 19732, "row7"] +])"}, + &expected_array); + ASSERT_TRUE(array_status.ok()); + CheckResult(path, {split}, predicate, expected_array); + } + + // Test BETWEEN predicate on f1 (BIGINT) + { + // Test f1 BETWEEN 200 AND 500 -> rows 1,2,3,4 (values 200,300,400,500) + auto predicate = + PredicateBuilder::Between(/*field_index=*/1, /*field_name=*/"f1", FieldType::BIGINT, + Literal(200L), Literal(500L)); + std::shared_ptr expected_array; + auto array_status = + arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([ +[0, 3, 200, 2.2, 22.22, 19725, "row1"], +[0, 5, 300, 3.3, 33.33, 19727, "row2"], +[0, 7, 400, 4.4, 44.44, 19729, "row3"], +[0, 9, 500, 5.5, 55.55, 19731, "row4"] +])"}, + &expected_array); + ASSERT_TRUE(array_status.ok()); + CheckResult(path, {split}, predicate, expected_array); + } + + // Test IN predicate on f2 (FLOAT) + { + // Test f2 IN (1.1, 4.4, 6.6) -> rows 0,3,7 (values 1.1,4.4,6.6) + auto predicate = + PredicateBuilder::In(/*field_index=*/2, /*field_name=*/"f2", FieldType::FLOAT, + {Literal(1.1f), Literal(4.4f), Literal(6.6f)}); + std::shared_ptr expected_array; + auto array_status = + arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([ +[0, 17, 100, 1.1, 11.11, 19739, "row0"], +[0, 7, 400, 4.4, 44.44, 19729, "row3"], +[0, 10, 600, 6.6, 66.66, 19732, "row7"] +])"}, + &expected_array); + ASSERT_TRUE(array_status.ok()); + CheckResult(path, {split}, predicate, expected_array); + } + { + // Test nested composite: (f0 = 3 OR f0 = 17) AND f1 < 200 + // (f0 = 3 OR f0 = 17): matches rows 0,1 + // f1 < 200: matches rows 0 (f1=100) + // Combined AND: matches rows 0 + auto predicate1 = PredicateBuilder::Equal(/*field_index=*/0, /*field_name=*/"f0", + FieldType::INT, Literal(3)); + auto predicate2 = PredicateBuilder::Equal(/*field_index=*/0, /*field_name=*/"f0", + FieldType::INT, Literal(17)); + ASSERT_OK_AND_ASSIGN(auto or_predicate, PredicateBuilder::Or({predicate1, predicate2})); + + auto predicate3 = PredicateBuilder::LessThan(/*field_index=*/1, /*field_name=*/"f1", + FieldType::BIGINT, Literal(200L)); + ASSERT_OK_AND_ASSIGN(auto and_predicate, + PredicateBuilder::And({or_predicate, predicate3})); + + std::shared_ptr expected_array; + auto array_status = + arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([ +[0, 17, 100, 1.1, 11.11, 19739, "row0"] +])"}, + &expected_array); + ASSERT_TRUE(array_status.ok()); + CheckResult(path, {split}, and_predicate, expected_array); + } + { + // Test AND predicate with mixed types: f0 >= 5 AND f1 > 100 + // f0 >= 5: matches rows 3,4,7 + // f1 > 100: matches rows 2,3,4,7 + // Combined AND: matches rows 3,4,7 + auto predicate1 = PredicateBuilder::GreaterThan(/*field_index=*/0, /*field_name=*/"f0", + FieldType::INT, Literal(5)); + auto predicate2 = PredicateBuilder::GreaterThan(/*field_index=*/1, /*field_name=*/"f1", + FieldType::BIGINT, Literal(100L)); + ASSERT_OK_AND_ASSIGN(auto and_predicate, + PredicateBuilder::And({predicate1, predicate2})); + + std::shared_ptr expected_array; + auto array_status = + arrow::ipc::internal::json::ChunkedArrayFromJSON(arrow_data_type, {R"([ +[0, 7, 400, 4.4, 44.44, 19729, "row3"], +[0, 9, 500, 5.5, 55.55, 19731, "row4"], +[0, 10, 600, 6.6, 66.66, 19732, "row7"] +])"}, + &expected_array); + ASSERT_TRUE(array_status.ok()); + CheckResult(path, {split}, and_predicate, expected_array); + } + } + void CheckResultForBsi(const std::string& path, const std::shared_ptr& arrow_data_type, const std::shared_ptr split) const { @@ -2072,6 +2362,95 @@ TEST_P(ReadInteWithIndexTest, TestWithIndexWithoutRegistered) { } } +TEST_P(ReadInteWithIndexTest, TestRangeBitmapIndex) { + auto [file_format, enable_prefetch] = GetParam(); + std::string path = + GetDataDir() + file_format + "/append_with_rangebitmap.db/append_with_rangebitmap/"; + std::string file_name; + if (file_format == "orc") { + file_name = "data-5759403d-17fc-4031-b5bb-5e22b02fdb3b-0.orc"; + } else if (file_format == "parquet") { + file_name = "data-2fb852e2-e4b5-4807-bf92-04401ed10560-0.parquet"; + } + + std::vector read_fields = {SpecialFields::ValueKind(), + DataField(0, arrow::field("f0", arrow::int32())), + DataField(1, arrow::field("f1", arrow::int64())), + DataField(2, arrow::field("f2", arrow::float32())), + DataField(3, arrow::field("f3", arrow::float64())), + DataField(4, arrow::field("f4", arrow::date32())), + DataField(5, arrow::field("f5", arrow::utf8()))}; + std::shared_ptr arrow_data_type = + DataField::ConvertDataFieldsToArrowStructType(read_fields); + + auto data_file_meta = std::make_shared( + file_name, /*file_size=*/1288, + /*row_count=*/8, /*min_key=*/BinaryRow::EmptyRow(), + /*max_key=*/BinaryRow::EmptyRow(), /*key_stats=*/SimpleStats::EmptyStats(), + /*value_stats=*/SimpleStats::EmptyStats(), /*min_sequence_number=*/0, + /*max_sequence_number=*/7, /*schema_id=*/0, + /*level=*/0, + /*extra_files=*/ + std::vector>({file_name + ".index"}), + /*creation_time=*/Timestamp(0ll, 0), /*delete_row_count=*/0, + /*embedded_index=*/nullptr, FileSource::Append(), + /*value_stats_cols=*/std::nullopt, + /*external_path=*/std::nullopt, /*first_row_id=*/std::nullopt, /*write_cols=*/std::nullopt); + + DataSplitImpl::Builder builder(BinaryRow::EmptyRow(), /*bucket=*/0, + /*bucket_path=*/path + "bucket-0/", {data_file_meta}); + ASSERT_OK_AND_ASSIGN(auto split, + builder.WithSnapshot(1).IsStreaming(false).RawConvertible(true).Build()); + + // Run comprehensive range bitmap index tests + CheckResultForRangeBitmap(path, arrow_data_type, split); +} + +TEST_P(ReadInteWithIndexTest, TestRangeBitmapIndexMultiChunk) { + auto [file_format, enable_prefetch] = GetParam(); + std::string path = + GetDataDir() + file_format + + "/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/"; + std::string file_name; + if (file_format == "orc") { + file_name = "data-b6b64e1e-e3d2-4f08-9a36-726a96cde1be-0.orc"; + } else if (file_format == "parquet") { + file_name = "data-e4673af1-afcb-4b84-b69a-ae472ba517f2-0.parquet"; + } + + std::vector read_fields = {SpecialFields::ValueKind(), + DataField(0, arrow::field("f0", arrow::int32())), + DataField(1, arrow::field("f1", arrow::int64())), + DataField(2, arrow::field("f2", arrow::float32())), + DataField(3, arrow::field("f3", arrow::float64())), + DataField(4, arrow::field("f4", arrow::date32())), + DataField(5, arrow::field("f5", arrow::utf8()))}; + std::shared_ptr arrow_data_type = + DataField::ConvertDataFieldsToArrowStructType(read_fields); + + auto data_file_meta = std::make_shared( + file_name, /*file_size=*/1413, + /*row_count=*/8, /*min_key=*/BinaryRow::EmptyRow(), + /*max_key=*/BinaryRow::EmptyRow(), /*key_stats=*/SimpleStats::EmptyStats(), + /*value_stats=*/SimpleStats::EmptyStats(), /*min_sequence_number=*/0, + /*max_sequence_number=*/7, /*schema_id=*/0, + /*level=*/0, + /*extra_files=*/ + std::vector>({file_name + ".index"}), + /*creation_time=*/Timestamp(0ll, 0), /*delete_row_count=*/0, + /*embedded_index=*/nullptr, FileSource::Append(), + /*value_stats_cols=*/std::nullopt, + /*external_path=*/std::nullopt, /*first_row_id=*/std::nullopt, /*write_cols=*/std::nullopt); + + DataSplitImpl::Builder builder(BinaryRow::EmptyRow(), /*bucket=*/0, + /*bucket_path=*/path + "bucket-0/", {data_file_meta}); + ASSERT_OK_AND_ASSIGN(auto split, + builder.WithSnapshot(1).IsStreaming(false).RawConvertible(true).Build()); + + // Run range bitmap index tests with multi-chunk test data + CheckResultForRangeBitmap(path, arrow_data_type, split); +} + TEST_P(ReadInteWithIndexTest, TestWithIOException) { auto [file_format, enable_prefetch] = GetParam(); std::string path = GetDataDir() + "/" + file_format + diff --git a/test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/README b/test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/README new file mode 100644 index 00000000..52eb2755 --- /dev/null +++ b/test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/README @@ -0,0 +1,16 @@ +f0:int (nullable), f1:long (nullable), f2:float (nullable), f3:double (nullable), f4:date (nullable), f5:string (nullable) +no partition key +no bucket key +bucket count: -1 +range-bitmap index: f0,f1,f2,f3,f4 + +Rows (snapshot-1): +Add: [17, 100L, 1.1f, 11.11, date('2024-01-17'), 'row0'] +Add: [3, 200L, 2.2f, 22.22, date('2024-01-03'), 'row1'] +Add: [5, 300L, 3.3f, 33.33, date('2024-01-05'), 'row2'] +Add: [7, 400L, 4.4f, 44.44, date('2024-01-07'), 'row3'] +Add: [9, 500L, 5.5f, 55.55, date('2024-01-09'), 'row4'] +Add: [null, null, null, null, null, null] +Add: [null, null, null, null, null, 'null_row'] +Add: [10, 600L, 6.6f, 66.66, date('2024-01-10'), 'row7'] +NoCompact \ No newline at end of file diff --git a/test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/bucket-0/data-5759403d-17fc-4031-b5bb-5e22b02fdb3b-0.orc b/test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/bucket-0/data-5759403d-17fc-4031-b5bb-5e22b02fdb3b-0.orc new file mode 100644 index 0000000000000000000000000000000000000000..fe2d4db19ca7ffda1fe7e110c3c30baaa73cb10f GIT binary patch literal 1024 zcmeYdau#G@;9?VE;ot~hFa|Qkx!4&XK!}HfO^A(yO+ZN^Er8JyC?|!G;|I!da-3ik z*uW|ALLq=L8Yr#J#Rk=>$iXHh%?aczm^t&KJ zv`}*pj!n?BD?5yE&wRz%g&AcmPD7 zECWLv>nyWbQnTb{Da}%wr2#VSw<7}s&}{}lYzV|gKx_=eCNP%++2-5~3`{I;j4gK< z7&Nx(|5ezd%gj*J$}ec(`zlslOH$S zW=e91yTCRfF+7sVlaYaIAtM(!ZEyjD9FZC#1h@pzQa}VBmomXUl-n}ZVcG;;D*|?1D`V& zYUyyl%eI~}<<(yIjmFR0zTZ=Ms62I5#rsSq<6jO_AMBgPr#JKTLDtE(CUsog!rKnl z>Rz{;=yPMfhgw^JN79qq!5!zN8h#%NZEMk_&_%XqtQg8#@c%!OlXKK7lMK zvG5`6*;@JR_-N+f zJ7y?Lx7R;Ig@+){4d;?De&C?Z@jS0FVdci3@ z(XdKwbrWpn2BHw}i1~+PD6JeHd3G2(ImjPAw3x(r>(rU^4myVy)_+6 z%pvoqV+YpeS1>OYo8j8k^NV^9_*10JkP+s_nnyEKS!C`8fntSwA#9a4b<~)lnpoQT zyc~N_+qbLLn-Bh5DKo4`Hg%_GsEW3VGF8;3)wdoiq*(PfbsXQrFegMfiat_i2rnWc Yie?CV4}jeZv2`J~Ojl`5NB`TyA2(TMPyhe` literal 0 HcmV?d00001 diff --git a/test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/manifest/manifest-642b5c6e-a8d6-46d0-bd3b-686478c89f6b-0 b/test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/manifest/manifest-642b5c6e-a8d6-46d0-bd3b-686478c89f6b-0 new file mode 100644 index 0000000000000000000000000000000000000000..854ee21bfff8f949e412df3669b98889678e1559 GIT binary patch literal 2175 zcmeZI%3@>@ODrqO*DFrWNX<>$CtIylQdy9yWTjM;nw(#hqNJmgmzayeFD^(-1_|aD zrRyaE*%_&N1&Nut`FVO^!_rgpQi~ExQbF3&GE;L>ij}OQt6?U^hq(p?d;0qUC82g@ z=9MVb>L3(&d-}QHmJM(W3h@j-~-QA$ZoODxSP zQL+N*tc|TjPK;nj!2JU;5^gm1Tmnf6@ep6awGRED0L5IeYhb9WpEEEuhx$6X1|iZI zW-0_GG2&GRJA1hLI>vhnLR!|}ip(=`GfG;mWO86TfZAZ3n!Xpl3a z)PZXUg)lzNNO=R83w;p54l@BBX1IbMpXtE(bPe(Y7SjQaAs(21gBGCpOmOoI3Jw8> zGE%yNxs7lThX;8ABZ*2L1C}bWwIbm9F*!daHCd<%m|XHw5{pt8ly1pHC^aU0`fJcy zcjBP$Ri+ip8e8@MGI+$YF&yI3G)(ZlEMXwgvZ!fGQoESagW9X%S2n8sll?pO%d!u% zZ#Ef=F1q_p)qKtT7j2j08kqxc2K|4p&wAqI(N7!0H|z?v5&O-y|E$oPWA=eN_!d?P zB<|qY$rB=`Y5r(|)5BeyzNc^0y%N8%a6!-l1s@9`w|~+$9$xF1Wj+|ozJLGyw`Je4 ztA+k8A(v*kPIMDAQ(QXP$-|8^?k3AYt;JgmmUHUztoAb4KU;u-XU&OX)kd2GLIwgx dVw^lpX^eRZCKFT*cxSNr7$@ZLr7)s<695o+)kXjS literal 0 HcmV?d00001 diff --git a/test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/manifest/manifest-list-64353b80-fb7c-470e-972a-07d0717af717-0 b/test/test_data/orc/append_with_rangebitmap.db/append_with_rangebitmap/manifest/manifest-list-64353b80-fb7c-470e-972a-07d0717af717-0 new file mode 100644 index 0000000000000000000000000000000000000000..4786f9eaf474fc77099c83a27d8d3cb2c80bf9ae GIT binary patch literal 1006 zcmbVLO-sWt7*_D?LBy*N@HToAVYj+QET*lj?fe)eWNo%p+H|CuFh~Ext}6Zl{Z$@4 zdGl&B=UUgHbGN2>KJq+oVs&mdxAw>_Pn6W})0n(%kAWBsD5wKY{e%ag1{jGe>vTYT z`7_}?h2-CTstiaNCrp`&^(dp92rB8_kVZiY>h1Bj5^lk!ty^ZkIr9pc09ma7oahK* zQ#D`#Nd*sCuZptu>k=wT7BHr3U{%wghNY%m=q*KR5J6`J?77->bSIC1ZAWzq)j!I^ zxJq17kD;5mr6RX{+|EeQ^hbI_y<7ohx(MNh<(j2awgDSP5y<&{$4t zwhyKqd6`!O0qv4uBtTs*L@G|!r|Vk4SShB=k=VN4TJfX>nR?@DL@`>xq@ z#O7Qp&NwI=OTatRg2>TrxP%m(#o1^b<(i)=s3c#(IKGW^!+-{oHQi;sdyxjTp!Wdm zq2BjQPgp>%r+dq){|FDEb>cno*rtO9y5(1o8%{Va{%~*WA0L2ne9J=Jp6^snc?7c4 zBmpAtdHk3$JQ2~voTjo<^L;SOC@6ylAasaN6An6(Sq5dbS#I|L+B!0gzl19-z4g2A zp|=1nRAlqlBr2Cd8r_&HbS*f5)@HJsQiW>2_n{ie!tGtAtBJ1I{XcS0ZqHs_loJ{~ zJ}WrR2xdgtd2xYmrhCu6>bKLM->-t&bnE5i&W&)>^OIQTxqM3)S7*=QZW27&S%AyCGNKuJ@JgRjFSvT4M$iXHh%?aczm^t&KJ zv`}*pj!n?BD?5yE&wRz%g&AcmPD7 zECWLv>nyWbQnTb{Da}%wr2#VSw<7}s&}{}lYzV|gKx_=eCNP%++2-5~3`{I;j4gK< z7&Nx(|5ezd%gj*J$}ec(`zlslOH$S zW=e91yTCRfF+7sVlaYaIAtM(!ZEyjD9FZC#1h@pzQa}VBmomXUl-n}ZVcG;;D*|?1D`V& zYUyyl%eI~}<<(yIjmFR0zTZ=Ms62I5#rsSq<6jO_AMBgPr#JKTLDtE(CUsog!rKnl z>Rz{;=yPMfhgw^JN79qq!5!zN8h#%NZEMk5~m6k2h!j#xJkHj<>Vlaet|fU zNF4l#Ui|>iGc(&=cGE=Cq;K-fJNtfrbSk`Z`D0}9Q3xRca!)wbS^b?qt(+Vl%DalJ zib9sGmEfdT!{oH(--he7tr>+x?rdTd4y1p_V@4U<`r75&yZZF`GitJRTA8Z#8ExIV zCbq8f2)ph3is7HMQ=3|iwyq|Zx9S(;&ua5wwRZQ#Ut*K3ht{#S8;g}{-GqWNv30d% z(@ODrqO*DFrWNX<>$CtIylQdy9yWTjM;nw(#hqNJmgmzayeFD^(-1_|aD zrRyaE*%_&N1&Nut`FVO^!_rgpQi~ExQbF3&GE;L>ij}OQt6?U^hq(p?d;0qUC82g@ z=9MVb>L3(&d-}QHmJM(W3h@j-~-QA$ZoODxSP zQL+N*tc|TjPK;nj!2JU;5^gm1Tmnf6@ep6awGRED0L5IeYhb9WpEEEuhx$6X1|iZI zW-0_GG2&GRJA1hLI>vhnLR!|}ip(=`GfG;mWO86TfZAZ3n!Xpl3a z)PZXUg)lzNNO=R83w;p54l@BBX1IbMpXtE(bPe(Y7SjQaAs(21gBGCpOmOoI3Jw8> zGE%yNxs7lThX;8ABZ*2L1C}bWwIbm9F*!daHCd<%m|XHw5{pt8QhNl{!=xtf_^$oR z`mo5N5~fwm8e8@MGI*r2F&y&JG)(ZlEMcJHVS0gw*_($+IzB=2o+oG<& zn}kJ8Zr=9R-6dJXN={WOaqkH{QD0syBM)Zz_;I^i*L} zf#4*gf`vzR2(XKDU-!(2I`%Dm{{dds=}uo49Gd9x>HLSlBqP}e4f8)%)bGE4zR+Y- zU$sKS#ub-*qJq6drW8hPXi`(txEPVx`%1;I^I)f8Cd=pDoB|9yYfcoaHrgBzG7vBl c_deU|+-z>`kz1Z9so$qDdD|WXF&a=%2b}r|4?qnt5?9vg zfcWxf!g~tIzxz}fkT6b|G8OAlMmZ5w(z_vzf)v!-<8LM0LQPw<%zSg^6*2*`S^+rG zbc9W4paPN#9r;Cj)#Qh)H%XX))&QAjB@7$$=uz>Jj@ODrqO*DFrWNX<=r#aFFVQdy9yWTjM;nw(#hqNJmgmzayeFD^(-1_|aD zrRyaE*%_&N1&Nut`FVO^!_rgpQi~ExQbF3&GE;L>ij}OQt6?U^hq(p?d;0qUC82g@ z=9MVb>L3)jdHT4<`#Ji$B9s)D6lLb6W2y@Fj6zbClaHpxFVr{Q(Z$8pB_3pEFv2{n zDqUQCTtkRZ8{ile;u!+;LVR$DV+g`j0)ZDFkEsz$@c4TA#fLfigd#gPsj?)s7{&L# zjuAwt@(cCxiFfu7^+R%Hv{GJaPL2{VyrIFLSX7i)2@IOdlGI#KOhM(9z+n_y8>^#~ zlA4xSnp2`=1=3j?TZVXi*pSxc@~pshjv;mAP(_bnt$(L)r?yQFE3trY?1y5#(n)MTNm;*!L?l*FPG z29|uq>ZFgsdY3(>uPj~OG?{4;qsCVKzYHF(Obkga{NfX~=JqeJIlP?Hp=|q{Bi`$k z#b%wh-zc)}{qKFX;^Offb5vdT_!jTTsN{+anlkO2-f874PZqFwTrrwZEaJ$-aNvP4 Ig9y4E0MFQN3;+NC literal 0 HcmV?d00001 diff --git a/test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/schema/schema-0 b/test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/schema/schema-0 new file mode 100644 index 00000000..096f0300 --- /dev/null +++ b/test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/schema/schema-0 @@ -0,0 +1,44 @@ +{ + "version" : 3, + "id" : 0, + "fields" : [ { + "id" : 0, + "name" : "f0", + "type" : "INT" + }, { + "id" : 1, + "name" : "f1", + "type" : "BIGINT" + }, { + "id" : 2, + "name" : "f2", + "type" : "FLOAT" + }, { + "id" : 3, + "name" : "f3", + "type" : "DOUBLE" + }, { + "id" : 4, + "name" : "f4", + "type" : "DATE" + }, { + "id" : 5, + "name" : "f5", + "type" : "STRING" + } ], + "highestFieldId" : 5, + "partitionKeys" : [ ], + "primaryKeys" : [ ], + "options" : { + "file-index.range-bitmap.columns" : "f0,f1,f2,f3,f4", + "owner" : "xiaoheng", + "file-index.range-bitmap.f1.chunk-size" : "16B", + "file-index.range-bitmap.f2.chunk-size" : "16B", + "file-index.range-bitmap.f3.chunk-size" : "16B", + "file-index.in-manifest-threshold" : "1B", + "file-index.range-bitmap.f4.chunk-size" : "16B", + "file.format" : "orc", + "file-index.range-bitmap.f0.chunk-size" : "16B" + }, + "timeMillis" : 1772188734852 +} \ No newline at end of file diff --git a/test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/EARLIEST b/test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/EARLIEST new file mode 100644 index 00000000..56a6051c --- /dev/null +++ b/test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/EARLIEST @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/LATEST b/test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/LATEST new file mode 100644 index 00000000..56a6051c --- /dev/null +++ b/test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/LATEST @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/snapshot-1 b/test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/snapshot-1 new file mode 100644 index 00000000..1e8a9f72 --- /dev/null +++ b/test/test_data/orc/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/snapshot-1 @@ -0,0 +1,16 @@ +{ + "version" : 3, + "id" : 1, + "schemaId" : 0, + "baseManifestList" : "manifest-list-72b69947-13fc-4b20-8db2-ffa7d607b38f-0", + "baseManifestListSize" : 1006, + "deltaManifestList" : "manifest-list-72b69947-13fc-4b20-8db2-ffa7d607b38f-1", + "deltaManifestListSize" : 1106, + "commitUser" : "162120aa-5242-438d-bb0e-96ee933b3313", + "commitIdentifier" : 9223372036854775807, + "commitKind" : "APPEND", + "timeMillis" : 1772188737678, + "totalRecordCount" : 8, + "deltaRecordCount" : 8, + "nextRowId" : 0 +} \ No newline at end of file diff --git a/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/README b/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/README new file mode 100644 index 00000000..52eb2755 --- /dev/null +++ b/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/README @@ -0,0 +1,16 @@ +f0:int (nullable), f1:long (nullable), f2:float (nullable), f3:double (nullable), f4:date (nullable), f5:string (nullable) +no partition key +no bucket key +bucket count: -1 +range-bitmap index: f0,f1,f2,f3,f4 + +Rows (snapshot-1): +Add: [17, 100L, 1.1f, 11.11, date('2024-01-17'), 'row0'] +Add: [3, 200L, 2.2f, 22.22, date('2024-01-03'), 'row1'] +Add: [5, 300L, 3.3f, 33.33, date('2024-01-05'), 'row2'] +Add: [7, 400L, 4.4f, 44.44, date('2024-01-07'), 'row3'] +Add: [9, 500L, 5.5f, 55.55, date('2024-01-09'), 'row4'] +Add: [null, null, null, null, null, null] +Add: [null, null, null, null, null, 'null_row'] +Add: [10, 600L, 6.6f, 66.66, date('2024-01-10'), 'row7'] +NoCompact \ No newline at end of file diff --git a/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/bucket-0/data-2fb852e2-e4b5-4807-bf92-04401ed10560-0.parquet b/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/bucket-0/data-2fb852e2-e4b5-4807-bf92-04401ed10560-0.parquet new file mode 100644 index 0000000000000000000000000000000000000000..19fabdcaf2da847eadff9b7f2b5a97b2fd7ea90a GIT binary patch literal 1397 zcmZXUO=uHQ5XWaXn{*RXnquE$7nhQQ4H2|vv-zkHbv-DhBE*k^2eGF4z)JdIYO9Ec zQY}I$#Y4eTDjvL);-QBsJ(UU~1=~^-wKp%~MFi1;Ad1f0&%``3yKm;@{r3N6W;c7{ zoJxRX$OZD~?&ka+g@n-c5fLEz?C>Ax{06`Sfah^DyP|o~8qoY`0W=N(ca+H$vN|=f z+f)bG&%ugGD8M=#XMdsu1Lilyp!bpVCWgygF^g zvlf|2dpS1$*H;I2r0qKc(1_NA){M3ntp&}LhQe->A73XQ`s)Z&929sXNgoBwOJ(q8 zOpR*^ECLLSl*>IBu;OV!5cD-}^rRo5ax*VW0xJsMSbYdDQ^5?21@zgFCDG1z)w00S zh`U(ks=}h38{B24C0o5Lu+!l#cF^iwwn$t;scE*vgRD@IjB}J}Q0OE_rtSGG@5bjm zWZ8~y32uDbOBPvNraF7nozBLgTwnj-K+kagYOy~@f*5V%CB78vMlFIdRl9j0qA`tk z3qS(KRO|NH|4p<7lcG{aMI>mfCEFkwP!tT%kgUk%2q^flr0By_BANtgh+d=PTtv*6 z88*{Rg{YBl)5GK88-UM%404|P38lY$q#YPcaA2wdo8#y-7n$TTZdD|wRETwI5w(?rNRoQ7kL0WK@}nb-h~JCr10+V2KSlG|iyRb99-D%z03(-3*e` zAxvz2v=;xH;b%lktee<`D|Smjv8m$V*hx#kw(PCw|Z*U;2 zwyUZ3SUB1{(pN5o6R~(+>ri8PEgdh!kECK+QSDIEX*HKG_LhqId|K@-mQv~gxr|Nq N-w)RD70==a_aEa1`>6l` literal 0 HcmV?d00001 diff --git a/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/bucket-0/data-2fb852e2-e4b5-4807-bf92-04401ed10560-0.parquet.index b/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/bucket-0/data-2fb852e2-e4b5-4807-bf92-04401ed10560-0.parquet.index new file mode 100644 index 0000000000000000000000000000000000000000..18ea5e60dfcf54e54b3f8b2942f438dc5f5712ae GIT binary patch literal 1288 zcmc&zPb)-G6u_&_%XqtQg8#@c%!OlXKK7lMK zvG5`6*;@JR_-N+f zJ7y?Lx7R;Ig@+){4d;?De&C?Z@jS0FVdci3@ z(XdKwbrWpn2BHw}i1~+PD6JeHd3G2(ImjPAw3x(r>(rU^4myVy)_+6 z%pvoqV+YpeS1>OYo8j8k^NV^9_*10JkP+s_nnyEKS!C`8fntSwA#9a4b<~)lnpoQT zyc~N_+qbLLn-Bh5DKo4`Hg%_GsEW3VGF8;3)wdoiq*(PfbsXQrFegMfiat_i2rnWc Yie?CV4}jeZv2`J~Ojl`5NB`TyA2(TMPyhe` literal 0 HcmV?d00001 diff --git a/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/manifest/manifest-75f07296-b729-48db-aadd-17826f0aadf9-0 b/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/manifest/manifest-75f07296-b729-48db-aadd-17826f0aadf9-0 new file mode 100644 index 0000000000000000000000000000000000000000..c9299ed9ad03da89fe46695c7ec07c2c1946f320 GIT binary patch literal 2180 zcmeZI%3@>@ODrqO*DFrWNX<>$CtIylQdy9yWTjM;nw(#hqNJmgmzayeFD^(-1_|aD zrRyaE*%_&N1&Nut`FVO^!_rgpQi~ExQbF3&GE;L>ij}OQt6?U^hq(p?d;0qUC82g@ z=9MVb>L3(&d-}QHmJM(W3h@j-~-QA$ZoODxSP zQL+N*tc|TjPK;nj!2JU;5^gm1Tmnf6@ep6awGRED0L5IeYhb9WpEEEuhx$6X1|iZI zW-0_GG2&GRJA1hLI>vhnLR!|}ip(=`GfG;mWO86TfZAZ3n!Xpl3a z)PZXUg)lzNNO=R83w;p54l@BBX1IbMpXtE(bPe(Y7SjQaAs(21gBGCpOmOoI3Jw8> zGE%yNxs7lThX;8ABZ*2L1C}bWwIbm9F*!daHCd<%m|XHw5{pt8_|yMKO>$ATaL;7# z=SOP9n<+hM-!27#2ENQmOa8ylcWw84&XWB6LgvPfS9J&Vw=mEC7(UzEcY~6h zjDego)3g&NZ;vph7q1kV+I8&HY5N2Yrff&ADH$3i|Ku$koaP0y{g~hTy=?bg9sv)d zkm5jnpD%Zk7tAwR`6a1_&n#ue>UlkVhm=YuIC&*WmV`f>b+cHSOMt;5qWyUZQ)Po{ gf8R1mY`w@-99H`ydw-WBawqw|q*j#u;DF;QP1e_TN zVoNh&3CTH&(m@^N7}ph4oULFi&%~OpLmf*^r_@`E)FFcI2G|R&?HaC#fE`zJOVvNh z!?;ddQ;%s_xT7JjdfeWW)BHzzQ@h##WqAnUrtMjkQ+7c*j$@ODrqO*DFrWNX<=r#aFFVQdy9yWTjM;nw(#hqNJmgmzayeFD^(-1_|aD zrRyaE*%_&N1&Nut`FVO^!_rgpQi~ExQbF3&GE;L>ij}OQt6?U^hq(p?d;0qUC82g@ z=9MVb>L3)jdHT4<`#Ji$B9s)D6lLb6W2y@Fj6zbClaHpxFVr{Q(Z$8pB_3pEFv2{n zDqUQCTtkRZ8{ile;u!+;LVR$DV+g`j0)ZDFkEsz$@c4TA#fLfigd#gPsj?)s7{&L# zjuAwt@(cCxiFfu7^+R%Hv{GJaPL2{VyrIFLSX7i)2@IOdlGI#KOhM(9z+n_y8>^#~ zlA4xSnp2`=1=3j?TZVXi*pSxc@~pshjv;mAP(_bnt$(L)r?yQFE3trY?1y5#(n)MTNm;*!L?l*FPG zhNo{Ie~t<`09bt625z484XV(TuvV0U}9o8 L@W7Zs1l<+@HCS~J literal 0 HcmV?d00001 diff --git a/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/schema/schema-0 b/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/schema/schema-0 new file mode 100644 index 00000000..cdeb25d1 --- /dev/null +++ b/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/schema/schema-0 @@ -0,0 +1,38 @@ +{ + "version" : 3, + "id" : 0, + "fields" : [ { + "id" : 0, + "name" : "f0", + "type" : "INT" + }, { + "id" : 1, + "name" : "f1", + "type" : "BIGINT" + }, { + "id" : 2, + "name" : "f2", + "type" : "FLOAT" + }, { + "id" : 3, + "name" : "f3", + "type" : "DOUBLE" + }, { + "id" : 4, + "name" : "f4", + "type" : "DATE" + }, { + "id" : 5, + "name" : "f5", + "type" : "STRING" + } ], + "highestFieldId" : 5, + "partitionKeys" : [ ], + "primaryKeys" : [ ], + "options" : { + "file-index.range-bitmap.columns" : "f0,f1,f2,f3,f4", + "owner" : "xiaoheng", + "file-index.in-manifest-threshold" : "1B" + }, + "timeMillis" : 1772163669686 +} \ No newline at end of file diff --git a/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/snapshot/EARLIEST b/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/snapshot/EARLIEST new file mode 100644 index 00000000..56a6051c --- /dev/null +++ b/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/snapshot/EARLIEST @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/snapshot/LATEST b/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/snapshot/LATEST new file mode 100644 index 00000000..56a6051c --- /dev/null +++ b/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/snapshot/LATEST @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/snapshot/snapshot-1 b/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/snapshot/snapshot-1 new file mode 100644 index 00000000..fee41027 --- /dev/null +++ b/test/test_data/parquet/append_with_rangebitmap.db/append_with_rangebitmap/snapshot/snapshot-1 @@ -0,0 +1,16 @@ +{ + "version" : 3, + "id" : 1, + "schemaId" : 0, + "baseManifestList" : "manifest-list-28f50ecd-5d0e-4366-a13b-ca1f39bfac83-0", + "baseManifestListSize" : 1006, + "deltaManifestList" : "manifest-list-28f50ecd-5d0e-4366-a13b-ca1f39bfac83-1", + "deltaManifestListSize" : 1108, + "commitUser" : "95859ce1-495d-4176-8f68-f7fbd595554c", + "commitIdentifier" : 9223372036854775807, + "commitKind" : "APPEND", + "timeMillis" : 1772163672630, + "totalRecordCount" : 8, + "deltaRecordCount" : 8, + "nextRowId" : 0 +} \ No newline at end of file diff --git a/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/README b/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/README new file mode 100644 index 00000000..f308976e --- /dev/null +++ b/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/README @@ -0,0 +1,17 @@ +f0:int (nullable), f1:long (nullable), f2:float (nullable), f3:double (nullable), f4:date (nullable), f5:string (nullable) +no partition key +no bucket key +bucket count: -1 +range-bitmap index: f0,f1,f2,f3,f4 +range-bitmap index chunk-size: 16B + +Rows (snapshot-1): +Add: [17, 100L, 1.1f, 11.11, date('2024-01-17'), 'row0'] +Add: [3, 200L, 2.2f, 22.22, date('2024-01-03'), 'row1'] +Add: [5, 300L, 3.3f, 33.33, date('2024-01-05'), 'row2'] +Add: [7, 400L, 4.4f, 44.44, date('2024-01-07'), 'row3'] +Add: [9, 500L, 5.5f, 55.55, date('2024-01-09'), 'row4'] +Add: [null, null, null, null, null, null] +Add: [null, null, null, null, null, 'null_row'] +Add: [10, 600L, 6.6f, 66.66, date('2024-01-10'), 'row7'] +NoCompact \ No newline at end of file diff --git a/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/bucket-0/data-e4673af1-afcb-4b84-b69a-ae472ba517f2-0.parquet b/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/bucket-0/data-e4673af1-afcb-4b84-b69a-ae472ba517f2-0.parquet new file mode 100644 index 0000000000000000000000000000000000000000..67d9aea94bfb25060e9bc55c537a899393cc25d7 GIT binary patch literal 1397 zcmZXUO=uHQ5XWaXn{*RXnquE$7nhQQ4H2|vv-zkHbv-DhBE*k^2eGF4z)JdIYO9Ec zQY}I$#Y4eTDjvL);-QBsJ(UU~1=~^-wKp%~MFi1;Ad1f0&%``3yKm;@{r3N6W;c7{ zoJxRX$OZD~?&ka+g@n-c5fLEz?C>Ax{06`Sfah^DyP|o~8qoY`0W=N(ca+H$vN|=f z+f)bG&%ugGD8M=#XMdsu1Lilyp!bpVCWgygF^g zvlf|2dpS1$*H;I2r0qKc(1_NA){M3ntp&}LhQe->A73XQ`s)Z&929sXNgoBwOJ(q8 zOpR*^ECLLSl*>IBu;OV!5cD-}^rRo5ax*VW0xJsMSbYdDQ^5?21@zgFCDG1z)w00S zh`U(ks=}h38{B24C0o5Lu+!l#cF^iwwn$t;scE*vgRD@IjB}J}Q0OE_rtSGG@5bjm zWZ8~y32uDbOBPvNraF7nozBLgTwnj-K+kagYOy~@f*5V%CB78vMlFIdRl9j0qA`tk z3qS(KRO|NH|4p<7lcG{aMI>mfCEFkwP!tT%kgUk%2q^flqzFElp-GU2=ruadMZ}Dm zVKd!Sh#L7eJv<)10r(8aAm_QCQ2NV9+JV6Y2c{aZIgU1+HMT=Zy+nY)8 zQKJhb(QeBs40aRHrvcJR$NVG(biqeM5s`lPwcrP@YbaN_K2p4KsJ{{(Emns61_#1w zyP9f`g`>SAedR(p5sT-w4mFn7((yw4NGhfk)ebeCR&)7cZ>gBir`6tKDWx8e%h*)^ N{a_tm@hpCD{{iCS`>6l` literal 0 HcmV?d00001 diff --git a/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/bucket-0/data-e4673af1-afcb-4b84-b69a-ae472ba517f2-0.parquet.index b/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/bucket-0/data-e4673af1-afcb-4b84-b69a-ae472ba517f2-0.parquet.index new file mode 100644 index 0000000000000000000000000000000000000000..343734e7502e925b676741559eeb0ac746da6502 GIT binary patch literal 1413 zcmc&!O)mpM7@pbgYS9K!;v*a!9E1dmNJY%aMYbU>5~m6k2h!j#xJkHj<>Vlaet|fU zNF4l#Ui|>iGc(&=cGE=Cq;K-fJNtfrbSk`Z`D0}9Q3xRca!)wbS^b?qt(+Vl%DalJ zib9sGmEfdT!{oH(--he7tr>+x?rdTd4y1p_V@4U<`r75&yZZF`GitJRTA8Z#8ExIV zCbq8f2)ph3is7HMQ=3|iwyq|Zx9S(;&ua5wwRZQ#Ut*K3ht{#S8;g}{-GqWNv30d% z(@ODrqO*DFrWNX<>$CtIylQdy9yWTjM;nw(#hqNJmgmzayeFD^(-1_|aD zrRyaE*%_&N1&Nut`FVO^!_rgpQi~ExQbF3&GE;L>ij}OQt6?U^hq(p?d;0qUC82g@ z=9MVb>L3(&d-}QHmJM(W3h@j-~-QA$ZoODxSP zQL+N*tc|TjPK;nj!2JU;5^gm1Tmnf6@ep6awGRED0L5IeYhb9WpEEEuhx$6X1|iZI zW-0_GG2&GRJA1hLI>vhnLR!|}ip(=`GfG;mWO86TfZAZ3n!Xpl3a z)PZXUg)lzNNO=R83w;p54l@BBX1IbMpXtE(bPe(Y7SjQaAs(21gBGCpOmOoI3Jw8> zGE%yNxs7lThX;8ABZ*2L1C}bWwIbm9F*!daHCd<%m|XHw5{pt8W-s+871Uil3>0u{T_oxf`n{R$X>ZOH=<10SRToBrcS%l z^jT42i`aCTRz?21*A|99V7R2>$@u^EYgc!Tr7bS(N4=X3eg8Ivd$NZ-uAhI_XM>8J zoQ0ek(=`rP&7+JjHqR8D+2j5>y#9b7%QYv{Mxh{vpY0Dg1b^-_Iv}6BX7kOQw1$F9 zo%4Ej&gonj`((3;&K?`_?Q5CY%)%Y7FKE4!HqqO*r#siW`eep75dj8^i1z0tOqC6) h2?{AXLJ}-zneH6OQ1DKWGT@!D?7$6~6HMrS1pp>T(Wd|a literal 0 HcmV?d00001 diff --git a/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/manifest/manifest-list-a63d920d-a1ef-444b-a1cf-97b5df41f6e8-0 b/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/manifest/manifest-list-a63d920d-a1ef-444b-a1cf-97b5df41f6e8-0 new file mode 100644 index 0000000000000000000000000000000000000000..808abfa58069a65940b5fc34233876327d3e0a47 GIT binary patch literal 1006 zcmbVL%}#?b9G{72561WaJuf@}H(a|&0YT`TC z;v6^@ox4%``{?iYZ|&**(eXKXV3|?`3!0GklNI1)PJs#-4KfyjK1fN@Sr<78+^Cm>zMF%W&P>nDtnQdARhn#f75_d(r} zU-^9y(ivIC9H^36`Bi&Z&fpT68rI=2<;qAG-rx$k8_>o@4!>j3x(qVt!Cc{MpfNHI zvptk5Tu1gzyOK@XuTHlc-HQ7^vR|FmuP(?!8k{~BoTMRPVebRNXFJS$uP;w;L)9-o E0X5o06#xJL literal 0 HcmV?d00001 diff --git a/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/manifest/manifest-list-a63d920d-a1ef-444b-a1cf-97b5df41f6e8-1 b/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/manifest/manifest-list-a63d920d-a1ef-444b-a1cf-97b5df41f6e8-1 new file mode 100644 index 0000000000000000000000000000000000000000..d367884cb55c083d4c00227d38c28ec8d12cc7d8 GIT binary patch literal 1108 zcmeZI%3@>@ODrqO*DFrWNX<=r#aFFVQdy9yWTjM;nw(#hqNJmgmzayeFD^(-1_|aD zrRyaE*%_&N1&Nut`FVO^!_rgpQi~ExQbF3&GE;L>ij}OQt6?U^hq(p?d;0qUC82g@ z=9MVb>L3)jdHT4<`#Ji$B9s)D6lLb6W2y@Fj6zbClaHpxFVr{Q(Z$8pB_3pEFv2{n zDqUQCTtkRZ8{ile;u!+;LVR$DV+g`j0)ZDFkEsz$@c4TA#fLfigd#gPsj?)s7{&L# zjuAwt@(cCxiFfu7^+R%Hv{GJaPL2{VyrIFLSX7i)2@IOdlGI#KOhM(9z+n_y8>^#~ zlA4xSnp2`=1=3j?TZVXi*pSxc@~pshjv;mAP(_bnt$(L)r?yQFE3trY?1y5#(n)MTNm;*!L?l*FPG zhE0#p|M2m-t8}xg%F5}@$2O*Aj2c_@|1x-lGBG5v2uMsgYFXb|a;$yEtY7mlxFl_o zUO(a6W?xm-+}P`-uV>AQ;&^Z|OE0=gU-MCCPOa>e@|zztuUayTvFp5d5Sb?7$i#5q KfiZ&!x-9?)#&eti literal 0 HcmV?d00001 diff --git a/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/schema/schema-0 b/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/schema/schema-0 new file mode 100644 index 00000000..d4ca2df4 --- /dev/null +++ b/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/schema/schema-0 @@ -0,0 +1,43 @@ +{ + "version" : 3, + "id" : 0, + "fields" : [ { + "id" : 0, + "name" : "f0", + "type" : "INT" + }, { + "id" : 1, + "name" : "f1", + "type" : "BIGINT" + }, { + "id" : 2, + "name" : "f2", + "type" : "FLOAT" + }, { + "id" : 3, + "name" : "f3", + "type" : "DOUBLE" + }, { + "id" : 4, + "name" : "f4", + "type" : "DATE" + }, { + "id" : 5, + "name" : "f5", + "type" : "STRING" + } ], + "highestFieldId" : 5, + "partitionKeys" : [ ], + "primaryKeys" : [ ], + "options" : { + "file-index.range-bitmap.columns" : "f0,f1,f2,f3,f4", + "owner" : "xiaoheng", + "file-index.range-bitmap.f1.chunk-size" : "16B", + "file-index.range-bitmap.f2.chunk-size" : "16B", + "file-index.range-bitmap.f3.chunk-size" : "16B", + "file-index.in-manifest-threshold" : "1B", + "file-index.range-bitmap.f4.chunk-size" : "16B", + "file-index.range-bitmap.f0.chunk-size" : "16B" + }, + "timeMillis" : 1772188209180 +} \ No newline at end of file diff --git a/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/EARLIEST b/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/EARLIEST new file mode 100644 index 00000000..56a6051c --- /dev/null +++ b/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/EARLIEST @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/LATEST b/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/LATEST new file mode 100644 index 00000000..56a6051c --- /dev/null +++ b/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/LATEST @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/snapshot-1 b/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/snapshot-1 new file mode 100644 index 00000000..4e78d6b5 --- /dev/null +++ b/test/test_data/parquet/append_with_rangebitmap_multi_chunk.db/append_with_rangebitmap_multi_chunk/snapshot/snapshot-1 @@ -0,0 +1,16 @@ +{ + "version" : 3, + "id" : 1, + "schemaId" : 0, + "baseManifestList" : "manifest-list-a63d920d-a1ef-444b-a1cf-97b5df41f6e8-0", + "baseManifestListSize" : 1006, + "deltaManifestList" : "manifest-list-a63d920d-a1ef-444b-a1cf-97b5df41f6e8-1", + "deltaManifestListSize" : 1108, + "commitUser" : "9385bcac-276c-4639-b825-52623beb2a6d", + "commitIdentifier" : 9223372036854775807, + "commitKind" : "APPEND", + "timeMillis" : 1772188213862, + "totalRecordCount" : 8, + "deltaRecordCount" : 8, + "nextRowId" : 0 +} \ No newline at end of file From 892d811629c2955a962643c10d02ce6c1d647983 Mon Sep 17 00:00:00 2001 From: xiaoheng Date: Fri, 20 Mar 2026 10:28:48 +0800 Subject: [PATCH 2/4] feat: support rangebitmap read and write --- .../file_index/rangebitmap/range_bitmap.cpp | 2 +- .../rangebitmap/range_bitmap_file_index.cpp | 30 ------------------- 2 files changed, 1 insertion(+), 31 deletions(-) diff --git a/src/paimon/common/file_index/rangebitmap/range_bitmap.cpp b/src/paimon/common/file_index/rangebitmap/range_bitmap.cpp index ee2acaea..38afa603 100644 --- a/src/paimon/common/file_index/rangebitmap/range_bitmap.cpp +++ b/src/paimon/common/file_index/rangebitmap/range_bitmap.cpp @@ -190,7 +190,7 @@ Result RangeBitmap::IsNotNull() { if (cardinality_ <= 0) { return RoaringBitmap32(); } - PAIMON_ASSIGN_OR_RAISE(BitSliceIndexBitmap* const bit_slice_ptr, this->GetBitSliceIndex()); + PAIMON_ASSIGN_OR_RAISE(BitSliceIndexBitmap * bit_slice_ptr, this->GetBitSliceIndex()); PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 result, bit_slice_ptr->IsNotNull({})); return result; } diff --git a/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index.cpp b/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index.cpp index 8e04186c..345d7b4f 100644 --- a/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index.cpp +++ b/src/paimon/common/file_index/rangebitmap/range_bitmap_file_index.cpp @@ -137,9 +137,6 @@ Result> RangeBitmapFileIndexReader::VisitEqual( const Literal& literal) { return std::make_shared( [self = shared_from_this(), literal]() -> Result { - if (!self->range_bitmap_) { - return RoaringBitmap32(); - } return self->range_bitmap_->Eq(literal); }); } @@ -148,9 +145,6 @@ Result> RangeBitmapFileIndexReader::VisitNotEqu const Literal& literal) { return std::make_shared( [self = shared_from_this(), literal]() -> Result { - if (!self->range_bitmap_) { - return RoaringBitmap32(); - } return self->range_bitmap_->Neq(literal); }); } @@ -159,9 +153,6 @@ Result> RangeBitmapFileIndexReader::VisitIn( const std::vector& literals) { return std::make_shared( [self = shared_from_this(), literals]() -> Result { - if (!self->range_bitmap_) { - return RoaringBitmap32(); - } return self->range_bitmap_->In(literals); }); } @@ -170,9 +161,6 @@ Result> RangeBitmapFileIndexReader::VisitNotIn( const std::vector& literals) { return std::make_shared( [self = shared_from_this(), literals]() -> Result { - if (!self->range_bitmap_) { - return RoaringBitmap32(); - } return self->range_bitmap_->NotIn(literals); }); } @@ -180,9 +168,6 @@ Result> RangeBitmapFileIndexReader::VisitNotIn( Result> RangeBitmapFileIndexReader::VisitIsNull() { return std::make_shared( [self = shared_from_this()]() -> Result { - if (!self->range_bitmap_) { - return RoaringBitmap32(); - } return self->range_bitmap_->IsNull(); }); } @@ -190,9 +175,6 @@ Result> RangeBitmapFileIndexReader::VisitIsNull Result> RangeBitmapFileIndexReader::VisitIsNotNull() { return std::make_shared( [self = shared_from_this()]() -> Result { - if (!self->range_bitmap_) { - return RoaringBitmap32(); - } return self->range_bitmap_->IsNotNull(); }); } @@ -201,9 +183,6 @@ Result> RangeBitmapFileIndexReader::VisitGreate const Literal& literal) { return std::make_shared( [self = shared_from_this(), literal]() -> Result { - if (!self->range_bitmap_) { - return RoaringBitmap32(); - } return self->range_bitmap_->Gt(literal); }); } @@ -212,9 +191,6 @@ Result> RangeBitmapFileIndexReader::VisitLessTh const Literal& literal) { return std::make_shared( [self = shared_from_this(), literal]() -> Result { - if (!self->range_bitmap_) { - return RoaringBitmap32(); - } return self->range_bitmap_->Lt(literal); }); } @@ -223,9 +199,6 @@ Result> RangeBitmapFileIndexReader::VisitGreate const Literal& literal) { return std::make_shared( [self = shared_from_this(), literal]() -> Result { - if (!self->range_bitmap_) { - return RoaringBitmap32(); - } return self->range_bitmap_->Gte(literal); }); } @@ -234,9 +207,6 @@ Result> RangeBitmapFileIndexReader::VisitLessOr const Literal& literal) { return std::make_shared( [self = shared_from_this(), literal]() -> Result { - if (!self->range_bitmap_) { - return RoaringBitmap32(); - } return self->range_bitmap_->Lte(literal); }); } From 1d7bfdeaba54d3fd452a5fb52b3269f52045aa75 Mon Sep 17 00:00:00 2001 From: xiaoheng Date: Fri, 20 Mar 2026 10:38:09 +0800 Subject: [PATCH 3/4] feat: support rangebitmap read and write --- .../common/file_index/rangebitmap/range_bitmap.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/paimon/common/file_index/rangebitmap/range_bitmap.cpp b/src/paimon/common/file_index/rangebitmap/range_bitmap.cpp index 38afa603..ed0654bf 100644 --- a/src/paimon/common/file_index/rangebitmap/range_bitmap.cpp +++ b/src/paimon/common/file_index/rangebitmap/range_bitmap.cpp @@ -74,14 +74,14 @@ Result RangeBitmap::Eq(const Literal& key) { } PAIMON_ASSIGN_OR_RAISE(int32_t min_compare, key.CompareTo(min_)); PAIMON_ASSIGN_OR_RAISE(int32_t max_compare, key.CompareTo(max_)); - PAIMON_ASSIGN_OR_RAISE(BitSliceIndexBitmap * bit_slice_ptr, this->GetBitSliceIndex()); + PAIMON_ASSIGN_OR_RAISE(BitSliceIndexBitmap* bit_slice_ptr, this->GetBitSliceIndex()); if (min_compare == 0 && max_compare == 0) { return bit_slice_ptr->IsNotNull({}); } if (min_compare < 0 || max_compare > 0) { return RoaringBitmap32(); } - PAIMON_ASSIGN_OR_RAISE(Dictionary * dictionary, this->GetDictionary()); + PAIMON_ASSIGN_OR_RAISE(Dictionary* dictionary, this->GetDictionary()); PAIMON_ASSIGN_OR_RAISE(int32_t code, dictionary->Find(key)); if (code < 0) { return RoaringBitmap32(); @@ -134,9 +134,9 @@ Result RangeBitmap::Gt(const Literal& key) { if (min_compare < 0) { return IsNotNull(); } - PAIMON_ASSIGN_OR_RAISE(Dictionary * dictionary, this->GetDictionary()); + PAIMON_ASSIGN_OR_RAISE(Dictionary* dictionary, this->GetDictionary()); PAIMON_ASSIGN_OR_RAISE(int32_t code, dictionary->Find(key)); - PAIMON_ASSIGN_OR_RAISE(BitSliceIndexBitmap * bit_slice_ptr, this->GetBitSliceIndex()); + PAIMON_ASSIGN_OR_RAISE(BitSliceIndexBitmap* bit_slice_ptr, this->GetBitSliceIndex()); if (code >= 0) { return bit_slice_ptr->Gt(code); } @@ -190,7 +190,7 @@ Result RangeBitmap::IsNotNull() { if (cardinality_ <= 0) { return RoaringBitmap32(); } - PAIMON_ASSIGN_OR_RAISE(BitSliceIndexBitmap * bit_slice_ptr, this->GetBitSliceIndex()); + PAIMON_ASSIGN_OR_RAISE(BitSliceIndexBitmap* bit_slice_ptr, this->GetBitSliceIndex()); PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 result, bit_slice_ptr->IsNotNull({})); return result; } From 53d124583a5c25ffb9e97ac8abd3e347141bc097 Mon Sep 17 00:00:00 2001 From: xiaoheng Date: Fri, 20 Mar 2026 11:09:30 +0800 Subject: [PATCH 4/4] feat: support rangebitmap read and write --- .../common/file_index/rangebitmap/range_bitmap.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/paimon/common/file_index/rangebitmap/range_bitmap.cpp b/src/paimon/common/file_index/rangebitmap/range_bitmap.cpp index ed0654bf..0c487a1c 100644 --- a/src/paimon/common/file_index/rangebitmap/range_bitmap.cpp +++ b/src/paimon/common/file_index/rangebitmap/range_bitmap.cpp @@ -74,14 +74,14 @@ Result RangeBitmap::Eq(const Literal& key) { } PAIMON_ASSIGN_OR_RAISE(int32_t min_compare, key.CompareTo(min_)); PAIMON_ASSIGN_OR_RAISE(int32_t max_compare, key.CompareTo(max_)); - PAIMON_ASSIGN_OR_RAISE(BitSliceIndexBitmap* bit_slice_ptr, this->GetBitSliceIndex()); + PAIMON_ASSIGN_OR_RAISE(auto bit_slice_ptr, this->GetBitSliceIndex()); if (min_compare == 0 && max_compare == 0) { return bit_slice_ptr->IsNotNull({}); } if (min_compare < 0 || max_compare > 0) { return RoaringBitmap32(); } - PAIMON_ASSIGN_OR_RAISE(Dictionary* dictionary, this->GetDictionary()); + PAIMON_ASSIGN_OR_RAISE(auto dictionary, this->GetDictionary()); PAIMON_ASSIGN_OR_RAISE(int32_t code, dictionary->Find(key)); if (code < 0) { return RoaringBitmap32(); @@ -134,9 +134,9 @@ Result RangeBitmap::Gt(const Literal& key) { if (min_compare < 0) { return IsNotNull(); } - PAIMON_ASSIGN_OR_RAISE(Dictionary* dictionary, this->GetDictionary()); + PAIMON_ASSIGN_OR_RAISE(auto dictionary, this->GetDictionary()); PAIMON_ASSIGN_OR_RAISE(int32_t code, dictionary->Find(key)); - PAIMON_ASSIGN_OR_RAISE(BitSliceIndexBitmap* bit_slice_ptr, this->GetBitSliceIndex()); + PAIMON_ASSIGN_OR_RAISE(auto bit_slice_ptr, this->GetBitSliceIndex()); if (code >= 0) { return bit_slice_ptr->Gt(code); } @@ -190,7 +190,7 @@ Result RangeBitmap::IsNotNull() { if (cardinality_ <= 0) { return RoaringBitmap32(); } - PAIMON_ASSIGN_OR_RAISE(BitSliceIndexBitmap* bit_slice_ptr, this->GetBitSliceIndex()); + PAIMON_ASSIGN_OR_RAISE(auto bit_slice_ptr, this->GetBitSliceIndex()); PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 result, bit_slice_ptr->IsNotNull({})); return result; }