From 8fe2181c57c6ecfbe88d6141d4480b1f6c5fe066 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Tue, 30 Jun 2026 22:04:13 +0800 Subject: [PATCH] [feature](be) Add file scanner v2 readers ### What problem does this PR solve? Issue Number: close #xxx Related PR: #63893 Problem Summary: Add the file scanner v2 reader stack for external file scans, including native readers for Parquet, CSV/TEXT, JSON, JNI-backed table readers, schema projection, column mapping, predicate handling, reader statistics, page cache support, and related BE/FE integration. This also restores affected Parquet LZO regression cases by adding Doris thirdparty Arrow LZO page decompression support for file scanner v2. The change keeps VDirectInPredicate source-compatible with existing ordinary two-argument construction by defaulting the new HybridSet child-type flag to true. Dictionary-code rewrites can still pass false explicitly, while existing runtime filter tests continue to compile with the old call shape. Review follow-up fixes make RuntimeFilterExpr global-index slot rewriting update the executable _impl tree, document enable_file_scanner_v2 as default-on to match the FE default, and trim generated regression outputs so diff hygiene passes. ### Release note Support file scanner v2 readers for external file scan paths, including LZO-compressed Parquet reads in the new Parquet reader path. ### Check List (For Author) - Test: Manual test - Verified apache-arrow-17.0.0-lzo.patch applies with patch -p1 --dry-run against Arrow 17 column_reader.cc - Ran bash -n thirdparty/build-thirdparty.sh thirdparty/download-thirdparty.sh - Ran build-support/clang-format.sh - Ran git diff --check - Attempted ./run-be-ut.sh --run --filter='RuntimeFilterExprSamplingTest.deep_clone_clones_impl_tree'; local sandboxed run could not complete because the BE UT script required JDK 17 setup first, then needed submodule metadata writes and GitHub access for thirdparty dependencies. Escalated retry was not approved before timeout. - Attempted ./run-be-ut.sh --run --filter='FileScannerV2Test.RewriteSlotRefsToGlobalIndexMatrix'; local sandboxed run could not complete because the BE UT script needed submodule metadata writes and GitHub access for thirdparty dependencies. Escalated retry was not approved before timeout. - Full BE unit tests and external regression tests were not run in this local environment - Behavior changed: Yes. Adds file scanner v2 reader behavior and enables LZO-compressed Parquet reads through the new reader path - Does this need documentation: No --- .gitignore | 1 + be/cmake/thirdparty.cmake | 1 + be/src/core/data_type/data_type_timestamptz.h | 4 + .../data_type_datetimev2_serde.cpp | 143 + .../data_type_datetimev2_serde.h | 2 + .../data_type_datev2_serde.cpp | 24 + .../data_type_serde/data_type_datev2_serde.h | 2 + .../data_type_decimal_serde.cpp | 152 + .../data_type_serde/data_type_decimal_serde.h | 2 + .../data_type_nullable_serde.cpp | 37 +- .../data_type_nullable_serde.h | 2 + .../data_type_number_serde.cpp | 170 + .../data_type_serde/data_type_number_serde.h | 3 + .../core/data_type_serde/data_type_serde.cpp | 48 + be/src/core/data_type_serde/data_type_serde.h | 9 + .../data_type_string_serde.cpp | 42 + .../data_type_serde/data_type_string_serde.h | 3 + .../data_type_serde/data_type_time_serde.cpp | 48 + .../data_type_serde/data_type_time_serde.h | 2 + .../data_type_timestamptz_serde.cpp | 85 + .../data_type_timestamptz_serde.h | 4 + .../data_type_serde/decoded_column_view.h | 105 + be/src/exec/operator/file_scan_operator.cpp | 29 +- be/src/exec/operator/file_scan_operator.h | 2 + be/src/exec/operator/result_sink_operator.h | 2 +- be/src/exec/scan/access_path_parser.cpp | 479 ++ be/src/exec/scan/access_path_parser.h | 41 + be/src/exec/scan/file_scanner.cpp | 28 +- be/src/exec/scan/file_scanner.h | 4 +- be/src/exec/scan/file_scanner_v2.cpp | 835 ++++ be/src/exec/scan/file_scanner_v2.h | 162 + be/src/exec/scan/split_source_connector.h | 29 + .../sink/writer/vhive_partition_writer.cpp | 2 + be/src/exprs/runtime_filter_expr.cpp | 13 +- be/src/exprs/runtime_filter_expr.h | 6 + be/src/exprs/short_circuit_evaluation_expr.h | 35 +- be/src/exprs/vbloom_predicate.h | 7 + be/src/exprs/vcase_expr.h | 11 + be/src/exprs/vcast_expr.h | 12 + be/src/exprs/vcolumn_ref.h | 13 + be/src/exprs/vcompound_pred.h | 5 + be/src/exprs/vcondition_expr.h | 15 + be/src/exprs/vdirect_in_predicate.h | 9 +- be/src/exprs/vectorized_fn_call.cpp | 4 +- be/src/exprs/vectorized_fn_call.h | 6 + be/src/exprs/vexpr.cpp | 59 +- be/src/exprs/vexpr.h | 19 +- be/src/exprs/vin_predicate.h | 9 + be/src/exprs/vliteral.cpp | 6 - be/src/exprs/vliteral.h | 23 +- be/src/exprs/vslot_ref.cpp | 41 +- be/src/exprs/vslot_ref.h | 15 +- be/src/exprs/vtopn_pred.h | 5 + be/src/format/CMakeLists.txt | 3 + be/src/format/csv/csv_reader.cpp | 4 +- be/src/format/generic_reader.h | 12 +- be/src/format/json/new_json_reader.cpp | 4 +- be/src/format/native/native_reader.cpp | 4 +- be/src/format/orc/vorc_reader.cpp | 4 +- be/src/format/parquet/vparquet_reader.cpp | 4 +- .../format/table/deletion_vector_reader.cpp | 19 +- be/src/format/table/deletion_vector_reader.h | 35 +- be/src/format/table/iceberg_reader_mixin.h | 3 - be/src/format_v2/column_data.h | 410 ++ be/src/format_v2/column_mapper.cpp | 2029 ++++++++ be/src/format_v2/column_mapper.h | 294 ++ be/src/format_v2/column_mapper_nested.cpp | 1050 +++++ be/src/format_v2/column_mapper_nested.h | 105 + .../format_v2/delimited_text/csv_reader.cpp | 295 ++ be/src/format_v2/delimited_text/csv_reader.h | 73 + .../delimited_text/delimited_text_reader.cpp | 644 +++ .../delimited_text/delimited_text_reader.h | 176 + .../format_v2/delimited_text/text_reader.cpp | 164 + be/src/format_v2/delimited_text/text_reader.h | 62 + be/src/format_v2/expr/cast.cpp | 131 + be/src/format_v2/expr/cast.h | 68 + be/src/format_v2/expr/delete_predicate.cpp | 122 + be/src/format_v2/expr/delete_predicate.h | 60 + .../expr/equality_delete_predicate.cpp | 159 + .../expr/equality_delete_predicate.h | 71 + be/src/format_v2/file_reader.cpp | 209 + be/src/format_v2/file_reader.h | 400 ++ be/src/format_v2/jni/hudi_jni_reader.cpp | 167 + be/src/format_v2/jni/hudi_jni_reader.h | 43 + .../jni/iceberg_sys_table_reader.cpp | 76 + .../format_v2/jni/iceberg_sys_table_reader.h | 40 + be/src/format_v2/jni/jdbc_reader.cpp | 187 + be/src/format_v2/jni/jdbc_reader.h | 56 + be/src/format_v2/jni/jni_table_reader.cpp | 386 ++ be/src/format_v2/jni/jni_table_reader.h | 117 + .../format_v2/jni/max_compute_jni_reader.cpp | 149 + be/src/format_v2/jni/max_compute_jni_reader.h | 51 + be/src/format_v2/jni/paimon_jni_reader.cpp | 93 + be/src/format_v2/jni/paimon_jni_reader.h | 40 + .../jni/trino_connector_jni_reader.cpp | 141 + .../jni/trino_connector_jni_reader.h | 44 + be/src/format_v2/json/json_reader.cpp | 1123 +++++ be/src/format_v2/json/json_reader.h | 179 + be/src/format_v2/materialized_reader_util.cpp | 89 + be/src/format_v2/materialized_reader_util.h | 63 + be/src/format_v2/native/native_reader.cpp | 311 ++ be/src/format_v2/native/native_reader.h | 70 + .../parquet/parquet_column_schema.cpp | 492 ++ .../format_v2/parquet/parquet_column_schema.h | 80 + .../parquet/parquet_file_context.cpp | 442 ++ .../format_v2/parquet/parquet_file_context.h | 99 + be/src/format_v2/parquet/parquet_profile.cpp | 191 + be/src/format_v2/parquet/parquet_profile.h | 140 + be/src/format_v2/parquet/parquet_reader.cpp | 674 +++ be/src/format_v2/parquet/parquet_reader.h | 92 + be/src/format_v2/parquet/parquet_scan.cpp | 648 +++ be/src/format_v2/parquet/parquet_scan.h | 182 + .../format_v2/parquet/parquet_statistics.cpp | 1303 ++++++ be/src/format_v2/parquet/parquet_statistics.h | 109 + be/src/format_v2/parquet/parquet_type.cpp | 358 ++ be/src/format_v2/parquet/parquet_type.h | 82 + .../parquet/reader/column_reader.cpp | 625 +++ .../format_v2/parquet/reader/column_reader.h | 200 + .../reader/global_rowid_column_reader.cpp | 84 + .../reader/global_rowid_column_reader.h | 47 + .../parquet/reader/list_column_reader.cpp | 203 + .../parquet/reader/list_column_reader.h | 52 + .../parquet/reader/map_column_reader.cpp | 238 + .../parquet/reader/map_column_reader.h | 56 + .../reader/nested_column_materializer.cpp | 70 + .../reader/nested_column_materializer.h | 45 + .../parquet/reader/parquet_leaf_reader.cpp | 728 +++ .../parquet/reader/parquet_leaf_reader.h | 168 + .../reader/row_position_column_reader.cpp | 76 + .../reader/row_position_column_reader.h | 43 + .../parquet/reader/scalar_column_reader.cpp | 315 ++ .../parquet/reader/scalar_column_reader.h | 92 + .../parquet/reader/struct_column_reader.cpp | 258 + .../parquet/reader/struct_column_reader.h | 61 + be/src/format_v2/parquet/selection_vector.h | 163 + be/src/format_v2/schema_projection.cpp | 147 + be/src/format_v2/schema_projection.h | 57 + be/src/format_v2/table/hive_reader.cpp | 150 + be/src/format_v2/table/hive_reader.h | 41 + be/src/format_v2/table/hudi_reader.cpp | 163 + be/src/format_v2/table/hudi_reader.h | 78 + be/src/format_v2/table/iceberg_reader.cpp | 797 ++++ be/src/format_v2/table/iceberg_reader.h | 175 + be/src/format_v2/table/paimon_reader.cpp | 194 + be/src/format_v2/table/paimon_reader.h | 84 + .../format_v2/table/remote_doris_reader.cpp | 365 ++ be/src/format_v2/table/remote_doris_reader.h | 104 + .../format_v2/table/schema_history_util.cpp | 150 + be/src/format_v2/table/schema_history_util.h | 43 + be/src/format_v2/table_reader.cpp | 847 ++++ be/src/format_v2/table_reader.h | 1565 +++++++ be/src/io/file_factory.cpp | 13 +- be/src/io/file_factory.h | 5 +- be/src/io/io_common.h | 4 + be/src/storage/segment/condition_cache.h | 18 +- be/src/util/jni-util.h | 8 + be/test/CMakeLists.txt | 1 + .../data_type_serde_decoded_values_test.cpp | 1852 ++++++++ .../data_type_serde_pb_test.cpp | 14 +- .../runtime_filter_expr_sampling_test.cpp | 46 + be/test/exec/scan/access_path_parser_test.cpp | 371 ++ be/test/exec/scan/file_scanner_v2_test.cpp | 347 ++ .../scan/vfile_scanner_exception_test.cpp | 115 +- be/test/format_v2/column_mapper_test.cpp | 4140 +++++++++++++++++ .../delimited_text/csv_reader_test.cpp | 1070 +++++ .../delimited_text/text_reader_test.cpp | 965 ++++ be/test/format_v2/expr/cast_test.cpp | 172 + .../format_v2/expr/delete_predicate_test.cpp | 168 + .../expr/equality_delete_predicate_test.cpp | 181 + be/test/format_v2/json/json_reader_test.cpp | 608 +++ .../format_v2/native/native_reader_test.cpp | 419 ++ .../parquet/parquet_column_reader_test.cpp | 3620 ++++++++++++++ .../parquet/parquet_leaf_reader_test.cpp | 506 ++ .../parquet/parquet_page_cache_range_test.cpp | 117 + .../parquet/parquet_reader_control_test.cpp | 1034 ++++ .../format_v2/parquet/parquet_reader_test.cpp | 2274 +++++++++ .../format_v2/parquet/parquet_scan_test.cpp | 804 ++++ .../format_v2/parquet/parquet_schema_test.cpp | 527 +++ .../parquet/parquet_serde_reader_test.cpp | 459 ++ .../parquet/parquet_statistics_test.cpp | 460 ++ .../format_v2/parquet/parquet_type_test.cpp | 494 ++ be/test/format_v2/table/hive_reader_test.cpp | 151 + be/test/format_v2/table/hudi_reader_test.cpp | 182 + .../format_v2/table/iceberg_reader_test.cpp | 1852 ++++++++ .../format_v2/table/paimon_reader_test.cpp | 539 +++ .../table/remote_doris_reader_test.cpp | 470 ++ .../format_v2/table_reader_request_test.cpp | 96 + be/test/format_v2/table_reader_test.cpp | 3826 +++++++++++++++ docs/doris-iceberg-parquet-api-design.md | 511 ++ ...ew-parquet-reader-column-index-refactor.md | 404 ++ .../new-parquet-reader-ut-improvement-plan.md | 325 ++ docs/parquet-list-map-compat-design.md | 664 +++ .../apache/doris/paimon/PaimonJniScanner.java | 4 + .../datasource/hive/HMSExternalTable.java | 3 + .../paimon/source/PaimonScanNode.java | 4 + .../org/apache/doris/qe/SessionVariable.java | 17 +- .../ParquetFileFormatPropertiesTest.java | 2 + gensrc/thrift/Exprs.thrift | 4 + gensrc/thrift/Opcodes.thrift | 2 + gensrc/thrift/PaloInternalService.thrift | 1 + gensrc/thrift/PlanNodes.thrift | 8 + .../export_p0/export/test_show_export.out | 298 +- .../test_outfile_parquet_complex_type.out | 12 + .../data/export_p0/test_export_parquet.out | 198 +- .../parquet/test_hive_read_parquet.out | 24 +- ...> test_hive_read_parquet_complex_type.out} | 0 .../hive/ddl/test_hive_ctas.out | 200 - .../hive/test_complex_types.out | 48 - .../hive/test_external_catalog_hive.out | 124 - .../test_external_catalog_hive_partition.out | 120 - .../hive/test_hive_compress_type.out | 595 ++- .../hive/test_hive_get_schema_from_table.out | 651 --- .../hive/test_hive_openx_json.out | 1 + .../hive/test_hive_schema_evolution.out | 36 - .../hive/write/test_hive_write_insert.out | 232 - .../test_iceberg_export_timestamp_tz.out | 48 +- .../test_paimon_catalog_timestamp_tz.out | 8 +- .../tvf/test_hdfs_parquet_group0.out | Bin 23955 -> 26419 bytes .../tvf/test_hdfs_parquet_group2.out | 203 +- .../tvf/test_hdfs_parquet_group3.out | Bin 11387 -> 11347 bytes .../tvf/test_hdfs_parquet_group4.out | Bin 106854 -> 106812 bytes .../tvf/test_hdfs_parquet_group5.out | Bin 613319 -> 613345 bytes .../tvf/test_hdfs_parquet_group6.out | 30 +- .../test_outfile_parquet_complex_type.groovy | 11 + .../parquet/test_hive_read_parquet.groovy | 3 +- ...est_hive_read_parquet_complex_type.groovy} | 3 +- .../hive/test_hive_compress_type.groovy | 26 +- .../hive/test_hive_date_timezone.groovy | 1 - .../hive/test_parquet_lazy_mat_profile.groovy | 2 + .../test_iceberg_optimize_count.groovy | 6 +- .../test_remote_doris_agg_table_select.groovy | 4 +- ...st_remote_doris_unique_table_select.groovy | 4 +- .../test_remote_doris_variant_select.groovy | 2 +- .../tvf/test_hdfs_parquet_group0.groovy | 8 +- .../tvf/test_hdfs_parquet_group2.groovy | 5 +- .../tvf/test_hdfs_parquet_group4.groovy | 4 +- .../tvf/test_hdfs_parquet_group5.groovy | 4 +- .../tvf/test_hdfs_parquet_group6.groovy | 14 +- thirdparty/build-thirdparty.sh | 5 +- thirdparty/download-thirdparty.sh | 3 + .../patches/apache-arrow-17.0.0-lzo.patch | 84 + thirdparty/vars.sh | 8 +- 242 files changed, 56859 insertions(+), 2273 deletions(-) create mode 100644 be/src/core/data_type_serde/decoded_column_view.h create mode 100644 be/src/exec/scan/access_path_parser.cpp create mode 100644 be/src/exec/scan/access_path_parser.h create mode 100644 be/src/exec/scan/file_scanner_v2.cpp create mode 100644 be/src/exec/scan/file_scanner_v2.h create mode 100644 be/src/format_v2/column_data.h create mode 100644 be/src/format_v2/column_mapper.cpp create mode 100644 be/src/format_v2/column_mapper.h create mode 100644 be/src/format_v2/column_mapper_nested.cpp create mode 100644 be/src/format_v2/column_mapper_nested.h create mode 100644 be/src/format_v2/delimited_text/csv_reader.cpp create mode 100644 be/src/format_v2/delimited_text/csv_reader.h create mode 100644 be/src/format_v2/delimited_text/delimited_text_reader.cpp create mode 100644 be/src/format_v2/delimited_text/delimited_text_reader.h create mode 100644 be/src/format_v2/delimited_text/text_reader.cpp create mode 100644 be/src/format_v2/delimited_text/text_reader.h create mode 100644 be/src/format_v2/expr/cast.cpp create mode 100644 be/src/format_v2/expr/cast.h create mode 100644 be/src/format_v2/expr/delete_predicate.cpp create mode 100644 be/src/format_v2/expr/delete_predicate.h create mode 100644 be/src/format_v2/expr/equality_delete_predicate.cpp create mode 100644 be/src/format_v2/expr/equality_delete_predicate.h create mode 100644 be/src/format_v2/file_reader.cpp create mode 100644 be/src/format_v2/file_reader.h create mode 100644 be/src/format_v2/jni/hudi_jni_reader.cpp create mode 100644 be/src/format_v2/jni/hudi_jni_reader.h create mode 100644 be/src/format_v2/jni/iceberg_sys_table_reader.cpp create mode 100644 be/src/format_v2/jni/iceberg_sys_table_reader.h create mode 100644 be/src/format_v2/jni/jdbc_reader.cpp create mode 100644 be/src/format_v2/jni/jdbc_reader.h create mode 100644 be/src/format_v2/jni/jni_table_reader.cpp create mode 100644 be/src/format_v2/jni/jni_table_reader.h create mode 100644 be/src/format_v2/jni/max_compute_jni_reader.cpp create mode 100644 be/src/format_v2/jni/max_compute_jni_reader.h create mode 100644 be/src/format_v2/jni/paimon_jni_reader.cpp create mode 100644 be/src/format_v2/jni/paimon_jni_reader.h create mode 100644 be/src/format_v2/jni/trino_connector_jni_reader.cpp create mode 100644 be/src/format_v2/jni/trino_connector_jni_reader.h create mode 100644 be/src/format_v2/json/json_reader.cpp create mode 100644 be/src/format_v2/json/json_reader.h create mode 100644 be/src/format_v2/materialized_reader_util.cpp create mode 100644 be/src/format_v2/materialized_reader_util.h create mode 100644 be/src/format_v2/native/native_reader.cpp create mode 100644 be/src/format_v2/native/native_reader.h create mode 100644 be/src/format_v2/parquet/parquet_column_schema.cpp create mode 100644 be/src/format_v2/parquet/parquet_column_schema.h create mode 100644 be/src/format_v2/parquet/parquet_file_context.cpp create mode 100644 be/src/format_v2/parquet/parquet_file_context.h create mode 100644 be/src/format_v2/parquet/parquet_profile.cpp create mode 100644 be/src/format_v2/parquet/parquet_profile.h create mode 100644 be/src/format_v2/parquet/parquet_reader.cpp create mode 100644 be/src/format_v2/parquet/parquet_reader.h create mode 100644 be/src/format_v2/parquet/parquet_scan.cpp create mode 100644 be/src/format_v2/parquet/parquet_scan.h create mode 100644 be/src/format_v2/parquet/parquet_statistics.cpp create mode 100644 be/src/format_v2/parquet/parquet_statistics.h create mode 100644 be/src/format_v2/parquet/parquet_type.cpp create mode 100644 be/src/format_v2/parquet/parquet_type.h create mode 100644 be/src/format_v2/parquet/reader/column_reader.cpp create mode 100644 be/src/format_v2/parquet/reader/column_reader.h create mode 100644 be/src/format_v2/parquet/reader/global_rowid_column_reader.cpp create mode 100644 be/src/format_v2/parquet/reader/global_rowid_column_reader.h create mode 100644 be/src/format_v2/parquet/reader/list_column_reader.cpp create mode 100644 be/src/format_v2/parquet/reader/list_column_reader.h create mode 100644 be/src/format_v2/parquet/reader/map_column_reader.cpp create mode 100644 be/src/format_v2/parquet/reader/map_column_reader.h create mode 100644 be/src/format_v2/parquet/reader/nested_column_materializer.cpp create mode 100644 be/src/format_v2/parquet/reader/nested_column_materializer.h create mode 100644 be/src/format_v2/parquet/reader/parquet_leaf_reader.cpp create mode 100644 be/src/format_v2/parquet/reader/parquet_leaf_reader.h create mode 100644 be/src/format_v2/parquet/reader/row_position_column_reader.cpp create mode 100644 be/src/format_v2/parquet/reader/row_position_column_reader.h create mode 100644 be/src/format_v2/parquet/reader/scalar_column_reader.cpp create mode 100644 be/src/format_v2/parquet/reader/scalar_column_reader.h create mode 100644 be/src/format_v2/parquet/reader/struct_column_reader.cpp create mode 100644 be/src/format_v2/parquet/reader/struct_column_reader.h create mode 100644 be/src/format_v2/parquet/selection_vector.h create mode 100644 be/src/format_v2/schema_projection.cpp create mode 100644 be/src/format_v2/schema_projection.h create mode 100644 be/src/format_v2/table/hive_reader.cpp create mode 100644 be/src/format_v2/table/hive_reader.h create mode 100644 be/src/format_v2/table/hudi_reader.cpp create mode 100644 be/src/format_v2/table/hudi_reader.h create mode 100644 be/src/format_v2/table/iceberg_reader.cpp create mode 100644 be/src/format_v2/table/iceberg_reader.h create mode 100644 be/src/format_v2/table/paimon_reader.cpp create mode 100644 be/src/format_v2/table/paimon_reader.h create mode 100644 be/src/format_v2/table/remote_doris_reader.cpp create mode 100644 be/src/format_v2/table/remote_doris_reader.h create mode 100644 be/src/format_v2/table/schema_history_util.cpp create mode 100644 be/src/format_v2/table/schema_history_util.h create mode 100644 be/src/format_v2/table_reader.cpp create mode 100644 be/src/format_v2/table_reader.h create mode 100644 be/test/core/data_type_serde/data_type_serde_decoded_values_test.cpp create mode 100644 be/test/exec/scan/access_path_parser_test.cpp create mode 100644 be/test/exec/scan/file_scanner_v2_test.cpp create mode 100644 be/test/format_v2/column_mapper_test.cpp create mode 100644 be/test/format_v2/delimited_text/csv_reader_test.cpp create mode 100644 be/test/format_v2/delimited_text/text_reader_test.cpp create mode 100644 be/test/format_v2/expr/cast_test.cpp create mode 100644 be/test/format_v2/expr/delete_predicate_test.cpp create mode 100644 be/test/format_v2/expr/equality_delete_predicate_test.cpp create mode 100644 be/test/format_v2/json/json_reader_test.cpp create mode 100644 be/test/format_v2/native/native_reader_test.cpp create mode 100644 be/test/format_v2/parquet/parquet_column_reader_test.cpp create mode 100644 be/test/format_v2/parquet/parquet_leaf_reader_test.cpp create mode 100644 be/test/format_v2/parquet/parquet_page_cache_range_test.cpp create mode 100644 be/test/format_v2/parquet/parquet_reader_control_test.cpp create mode 100644 be/test/format_v2/parquet/parquet_reader_test.cpp create mode 100644 be/test/format_v2/parquet/parquet_scan_test.cpp create mode 100644 be/test/format_v2/parquet/parquet_schema_test.cpp create mode 100644 be/test/format_v2/parquet/parquet_serde_reader_test.cpp create mode 100644 be/test/format_v2/parquet/parquet_statistics_test.cpp create mode 100644 be/test/format_v2/parquet/parquet_type_test.cpp create mode 100644 be/test/format_v2/table/hive_reader_test.cpp create mode 100644 be/test/format_v2/table/hudi_reader_test.cpp create mode 100644 be/test/format_v2/table/iceberg_reader_test.cpp create mode 100644 be/test/format_v2/table/paimon_reader_test.cpp create mode 100644 be/test/format_v2/table/remote_doris_reader_test.cpp create mode 100644 be/test/format_v2/table_reader_request_test.cpp create mode 100644 be/test/format_v2/table_reader_test.cpp create mode 100644 docs/doris-iceberg-parquet-api-design.md create mode 100644 docs/new-parquet-reader-column-index-refactor.md create mode 100644 docs/new-parquet-reader-ut-improvement-plan.md create mode 100644 docs/parquet-list-map-compat-design.md rename regression-test/data/external_table_p0/export/hive_read/parquet/{test_hive_read_parquet_comlex_type.out => test_hive_read_parquet_complex_type.out} (100%) rename regression-test/suites/external_table_p0/export/hive_read/parquet/{test_hive_read_parquet_comlex_type.groovy => test_hive_read_parquet_complex_type.groovy} (99%) create mode 100644 thirdparty/patches/apache-arrow-17.0.0-lzo.patch diff --git a/.gitignore b/.gitignore index 7a61c598c99f75..d3976f46132e8c 100644 --- a/.gitignore +++ b/.gitignore @@ -153,3 +153,4 @@ compile_commands.json .github .worktrees/ +.worktree_initialized diff --git a/be/cmake/thirdparty.cmake b/be/cmake/thirdparty.cmake index 1dba907307deca..f8598d785dfed5 100644 --- a/be/cmake/thirdparty.cmake +++ b/be/cmake/thirdparty.cmake @@ -66,6 +66,7 @@ add_thirdparty(gmock) add_thirdparty(snappy) add_thirdparty(curl) add_thirdparty(lz4) +add_thirdparty(lzo2) add_thirdparty(thrift) add_thirdparty(thriftnb) add_thirdparty(crc32c) diff --git a/be/src/core/data_type/data_type_timestamptz.h b/be/src/core/data_type/data_type_timestamptz.h index 4a3fba0616cc45..b386402cb49696 100644 --- a/be/src/core/data_type/data_type_timestamptz.h +++ b/be/src/core/data_type/data_type_timestamptz.h @@ -56,6 +56,10 @@ class DataTypeTimeStampTz final : public DataTypeNumberBaseset_scale(_scale); + } + void to_pb_column_meta(PColumnMeta* col_meta) const override { DataTypeNumberBase::to_pb_column_meta(col_meta); col_meta->mutable_decimal_param()->set_scale(_scale); diff --git a/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp b/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp index ca84996ea45306..0eb5e4d44a39a1 100644 --- a/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp +++ b/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp @@ -28,6 +28,7 @@ #include "core/data_type/data_type_decimal.h" #include "core/data_type/data_type_number.h" #include "core/data_type/primitive_type.h" +#include "core/data_type_serde/decoded_column_view.h" #include "core/types.h" #include "core/value/vdatetime_value.h" #include "exprs/function/cast/cast_to_datetimev2_impl.hpp" @@ -43,6 +44,95 @@ enum { namespace doris { static const int64_t micro_to_nano_second = 1000; +namespace { + +#pragma pack(1) +struct DecodedInt96Timestamp { + int64_t nanos_of_day; + int32_t julian_day; + + int64_t to_timestamp_micros() const { + static constexpr int32_t JULIAN_EPOCH_OFFSET_DAYS = 2440588; + static constexpr int64_t MICROS_IN_DAY = 86400000000; + static constexpr int64_t NANOS_PER_MICROSECOND = 1000; + return (julian_day - JULIAN_EPOCH_OFFSET_DAYS) * MICROS_IN_DAY + + nanos_of_day / NANOS_PER_MICROSECOND; + } +}; +#pragma pack() +static_assert(sizeof(DecodedInt96Timestamp) == 12); + +Status append_datetimev2_from_epoch_micros(ColumnDateTimeV2::Container& data, + int64_t timestamp_micros) { + static constexpr int64_t MICROS_PER_SECOND = 1000000; + static constexpr int64_t MICROS_PER_MINUTE = MICROS_PER_SECOND * 60; + static constexpr int64_t MICROS_PER_HOUR = MICROS_PER_MINUTE * 60; + static constexpr int64_t MICROS_PER_DAY = MICROS_PER_HOUR * 24; + static const int64_t EPOCH_DAYNR = calc_daynr(1970, 1, 1); + + int64_t days_since_epoch = timestamp_micros / MICROS_PER_DAY; + int64_t micros_of_day = timestamp_micros % MICROS_PER_DAY; + if (micros_of_day < 0) { + micros_of_day += MICROS_PER_DAY; + --days_since_epoch; + } + + const int64_t daynr = EPOCH_DAYNR + days_since_epoch; + if (daynr <= 0) { + return Status::DataQualityError( + "Decoded DATETIMEV2 timestamp is out of range: micros={}, daynr={}", + timestamp_micros, daynr); + } + + DateV2Value datetime_value; + if (!datetime_value.get_date_from_daynr(static_cast(daynr))) { + return Status::DataQualityError( + "Decoded DATETIMEV2 timestamp is out of range: micros={}, daynr={}", + timestamp_micros, daynr); + } + + const auto hour = static_cast(micros_of_day / MICROS_PER_HOUR); + micros_of_day %= MICROS_PER_HOUR; + const auto minute = static_cast(micros_of_day / MICROS_PER_MINUTE); + micros_of_day %= MICROS_PER_MINUTE; + const auto second = static_cast(micros_of_day / MICROS_PER_SECOND); + const auto microsecond = static_cast(micros_of_day % MICROS_PER_SECOND); + datetime_value.unchecked_set_time(datetime_value.year(), datetime_value.month(), + datetime_value.day(), hour, minute, second, microsecond); + data.push_back(datetime_value); + return Status::OK(); +} + +void append_datetimev2_from_utc_epoch_micros(ColumnDateTimeV2::Container& data, + int64_t timestamp_micros, + const cctz::time_zone& timezone) { + static constexpr int64_t MICROS_PER_SECOND = 1000000; + + int64_t epoch_seconds = timestamp_micros / MICROS_PER_SECOND; + int64_t micros_of_second = timestamp_micros % MICROS_PER_SECOND; + if (micros_of_second < 0) { + micros_of_second += MICROS_PER_SECOND; + --epoch_seconds; + } + + DateV2Value datetime_value; + datetime_value.from_unixtime(epoch_seconds, timezone); + datetime_value.set_microsecond(static_cast(micros_of_second)); + data.push_back(datetime_value); +} + +int64_t decoded_timestamp_micros(const DecodedColumnView& view, int64_t value) { + if (view.time_unit == DecodedTimeUnit::MILLIS) { + return value * 1000; + } + if (view.time_unit == DecodedTimeUnit::NANOS) { + return value / 1000; + } + return value; +} + +} // namespace + // NOLINTBEGIN(readability-function-size) // NOLINTBEGIN(readability-function-cognitive-complexity) Status DataTypeDateTimeV2SerDe::from_string_batch(const ColumnString& col_str, @@ -451,6 +541,59 @@ Status DataTypeDateTimeV2SerDe::read_column_from_arrow(IColumn& column, return Status::OK(); } +Status DataTypeDateTimeV2SerDe::read_column_from_decoded_values( + IColumn& column, const DecodedColumnView& view) const { + if (view.value_kind != DecodedValueKind::INT64 && view.value_kind != DecodedValueKind::INT96) { + return decoded_column_view_handle_conversion_failure( + column, view, + Status::NotSupported("DATETIMEV2 decoded reader expects INT64 or INT96 source")); + } + if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) { + return Status::Corruption("Decoded value buffer is null for {}", column.get_name()); + } + auto& data = assert_cast(column).get_data(); + const auto old_size = data.size(); + if (view.value_kind == DecodedValueKind::INT96) { + const auto* values = reinterpret_cast(view.values); + static const auto utc_timezone = cctz::utc_time_zone(); + const auto& timezone = view.timezone == nullptr ? utc_timezone : *view.timezone; + for (int64_t row = 0; row < view.row_count; ++row) { + if (decoded_column_view_row_is_null(view, row)) { + data.push_back(DateV2Value()); + continue; + } + append_datetimev2_from_utc_epoch_micros(data, values[row].to_timestamp_micros(), + timezone); + } + return Status::OK(); + } + + const auto* values = reinterpret_cast(view.values); + static const auto utc_timezone = cctz::utc_time_zone(); + const auto& timezone = view.timezone == nullptr ? utc_timezone : *view.timezone; + for (int64_t row = 0; row < view.row_count; ++row) { + if (decoded_column_view_row_is_null(view, row)) { + data.push_back(DateV2Value()); + continue; + } + const int64_t timestamp_micros = decoded_timestamp_micros(view, values[row]); + if (view.timestamp_is_adjusted_to_utc) { + append_datetimev2_from_utc_epoch_micros(data, timestamp_micros, timezone); + } else { + auto st = append_datetimev2_from_epoch_micros(data, timestamp_micros); + if (!st.ok()) { + if (decoded_column_view_can_null_on_conversion_failure(view)) { + decoded_column_view_insert_null_on_conversion_failure(column, view, row); + continue; + } + data.resize(old_size); + return st; + } + } + } + return Status::OK(); +} + Status DataTypeDateTimeV2SerDe::write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& result, int64_t row_idx, bool col_const, diff --git a/be/src/core/data_type_serde/data_type_datetimev2_serde.h b/be/src/core/data_type_serde/data_type_datetimev2_serde.h index 0389432a621730..34d0373eba1c34 100644 --- a/be/src/core/data_type_serde/data_type_datetimev2_serde.h +++ b/be/src/core/data_type_serde/data_type_datetimev2_serde.h @@ -88,6 +88,8 @@ class DataTypeDateTimeV2SerDe : public DataTypeNumberSerDe(column).get_data(); + const auto* values = reinterpret_cast(view.values); + for (int64_t row = 0; row < view.row_count; ++row) { + if (decoded_column_view_row_is_null(view, row)) { + data.push_back(DateV2Value()); + continue; + } + DateV2Value date_v2; + date_v2.get_date_from_daynr(values[row] + date_threshold); + data.push_back(date_v2); + } + return Status::OK(); +} + Status DataTypeDateV2SerDe::write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& result, int64_t row_idx, bool col_const, diff --git a/be/src/core/data_type_serde/data_type_datev2_serde.h b/be/src/core/data_type_serde/data_type_datev2_serde.h index 0375f9be4b4b23..ff985d61345d5a 100644 --- a/be/src/core/data_type_serde/data_type_datev2_serde.h +++ b/be/src/core/data_type_serde/data_type_datev2_serde.h @@ -86,6 +86,8 @@ class DataTypeDateV2SerDe : public DataTypeNumberSerDe +NativeType decode_big_endian_signed_integer(const uint8_t* data, int length) { + if constexpr (std::is_same_v) { + NativeType value = data != nullptr && length > 0 && (data[0] & 0x80) != 0 ? NativeType(-1) + : NativeType(0); + for (int i = 0; i < length; ++i) { + value = (value << 8) + NativeType(data[i]); + } + return value; + } else { + using UnsignedNativeType = + std::conditional_t, unsigned __int128, + std::make_unsigned_t>; + UnsignedNativeType value = data != nullptr && length > 0 && (data[0] & 0x80) != 0 + ? static_cast(-1) + : 0; + for (int i = 0; i < length; ++i) { + value = static_cast((value << 8) | data[i]); + } + return static_cast(value); + } +} + +template +bool decoded_decimal_value_fits(const typename PrimitiveTypeTraits::CppType::NativeType& value, + UInt32 precision) { + return value >= min_decimal_value(precision).value && + value <= max_decimal_value(precision).value; +} + +template +bool decoded_decimal_int_value_fits(Int128 value, UInt32 precision) { + using NativeType = typename PrimitiveTypeTraits::CppType::NativeType; + if constexpr (std::is_same_v) { + const auto wide_value = wide::Int256(value); + return decoded_decimal_value_fits(wide_value, precision); + } else { + return value >= static_cast(min_decimal_value(precision).value) && + value <= static_cast(max_decimal_value(precision).value); + } +} + +template +Status read_decimal_decoded_value(const DecodedColumnView& view, UInt32 precision, int64_t row, + typename PrimitiveTypeTraits::CppType* result) { + using FieldType = typename PrimitiveTypeTraits::CppType; + using NativeType = typename FieldType::NativeType; + NativeType native_value; + if (view.value_kind == DecodedValueKind::INT32) { + const auto* values = reinterpret_cast(view.values); + const auto value = static_cast(values[row]); + if (!decoded_decimal_int_value_fits(value, precision)) { + return Status::DataQualityError("Decoded decimal value is out of range"); + } + native_value = NativeType(value); + } else if (view.value_kind == DecodedValueKind::INT64) { + const auto* values = reinterpret_cast(view.values); + const auto value = static_cast(values[row]); + if (!decoded_decimal_int_value_fits(value, precision)) { + return Status::DataQualityError("Decoded decimal value is out of range"); + } + native_value = NativeType(value); + } else { + const auto& value = (*view.binary_values)[row]; + const auto length = view.value_kind == DecodedValueKind::FIXED_BINARY + ? view.fixed_length + : cast_set(value.size); + if (length > static_cast(sizeof(NativeType))) { + return Status::DataQualityError("Decoded decimal binary value is too wide: length={}", + length); + } + native_value = decode_big_endian_signed_integer( + reinterpret_cast(value.data), length); + } + if (!decoded_decimal_value_fits(native_value, precision)) { + return Status::DataQualityError("Decoded decimal value is out of range"); + } + *result = FieldType {native_value}; + return Status::OK(); +} + +template +Status read_decimal_decoded_values(IColumn& column, const DecodedColumnView& view, + UInt32 precision) { + if (view.value_kind == DecodedValueKind::INT32 || view.value_kind == DecodedValueKind::INT64) { + if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) { + return Status::Corruption("Decoded value buffer is null for {}", column.get_name()); + } + } else if (view.binary_values == nullptr && decoded_column_view_has_non_null_value(view)) { + return Status::Corruption("Decoded binary values are null for {}", column.get_name()); + } + auto& data = assert_cast&>(column).get_data(); + const auto old_size = data.size(); + for (int64_t row = 0; row < view.row_count; ++row) { + if (decoded_column_view_row_is_null(view, row)) { + data.push_back(typename PrimitiveTypeTraits::CppType()); + continue; + } + if (view.value_kind == DecodedValueKind::BINARY || + view.value_kind == DecodedValueKind::FIXED_BINARY) { + const auto& value = (*view.binary_values)[row]; + const auto length = view.value_kind == DecodedValueKind::FIXED_BINARY + ? view.fixed_length + : cast_set(value.size); + if (value.data == nullptr && length > 0) { + if (decoded_column_view_can_null_on_conversion_failure(view)) { + decoded_column_view_insert_null_on_conversion_failure(column, view, row); + continue; + } + return Status::Corruption("Decoded decimal binary value is null for {} at row {}", + column.get_name(), row); + } + } + typename PrimitiveTypeTraits::CppType value; + auto st = read_decimal_decoded_value(view, precision, row, &value); + if (!st.ok()) { + if (decoded_column_view_can_null_on_conversion_failure(view)) { + decoded_column_view_insert_null_on_conversion_failure(column, view, row); + continue; + } + data.resize(old_size); + st.prepend(fmt::format( + "Failed to decode decimal value for {} at row {}: ", column.get_name(), row)); + return st; + } + data.push_back(value); + } + return Status::OK(); +} + +} // namespace template Status DataTypeDecimalSerDe::from_string_batch(const ColumnString& str, ColumnNullable& column, @@ -371,6 +505,24 @@ Status DataTypeDecimalSerDe::read_column_from_arrow(IColumn& column, return Status::OK(); } +template +Status DataTypeDecimalSerDe::read_column_from_decoded_values( + IColumn& column, const DecodedColumnView& view) const { + if constexpr (T == TYPE_DECIMAL32 || T == TYPE_DECIMAL64 || T == TYPE_DECIMAL128I || + T == TYPE_DECIMAL256) { + if (view.value_kind == DecodedValueKind::INT32 || + view.value_kind == DecodedValueKind::INT64 || + view.value_kind == DecodedValueKind::BINARY || + view.value_kind == DecodedValueKind::FIXED_BINARY) { + return read_decimal_decoded_values(column, view, precision); + } + } + return decoded_column_view_handle_conversion_failure( + column, view, + Status::NotSupported("Unsupported decoded values for {} from source kind {}", + get_name(), static_cast(view.value_kind))); +} + template Status DataTypeDecimalSerDe::write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& result, diff --git a/be/src/core/data_type_serde/data_type_decimal_serde.h b/be/src/core/data_type_serde/data_type_decimal_serde.h index 0185672e024718..089835a21be955 100644 --- a/be/src/core/data_type_serde/data_type_decimal_serde.h +++ b/be/src/core/data_type_serde/data_type_decimal_serde.h @@ -107,6 +107,8 @@ class DataTypeDecimalSerDe : public DataTypeSerDe { const cctz::time_zone& ctz) const override; Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, int64_t end, const cctz::time_zone& ctz) const override; + Status read_column_from_decoded_values(IColumn& column, + const DecodedColumnView& view) const override; Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& row_buffer, int64_t row_idx, bool col_const, const FormatOptions& options) const override; diff --git a/be/src/core/data_type_serde/data_type_nullable_serde.cpp b/be/src/core/data_type_serde/data_type_nullable_serde.cpp index a93f8d6126c7d5..7c6ce46e1cd960 100644 --- a/be/src/core/data_type_serde/data_type_nullable_serde.cpp +++ b/be/src/core/data_type_serde/data_type_nullable_serde.cpp @@ -22,7 +22,7 @@ #include #include -#include +#include #include "core/assert_cast.h" #include "core/column/column.h" @@ -31,10 +31,12 @@ #include "core/column/column_vector.h" #include "core/data_type_serde/data_type_serde.h" #include "core/data_type_serde/data_type_string_serde.h" +#include "core/data_type_serde/decoded_column_view.h" #include "exprs/function/cast/cast_base.h" #include "format/transformer/vcsv_transformer.h" #include "util/jsonb_document.h" #include "util/jsonb_writer.h" +#include "util/simd/bits.h" namespace doris { class Arena; @@ -350,6 +352,39 @@ Status DataTypeNullableSerDe::read_column_from_arrow(IColumn& column, ctz); } +Status DataTypeNullableSerDe::read_column_from_decoded_values(IColumn& column, + const DecodedColumnView& view) const { + auto& nullable_column = assert_cast(column); + auto& null_map = nullable_column.get_null_map_data(); + const auto old_size = null_map.size(); + auto& nested_column = nullable_column.get_nested_column(); + const auto old_nested_size = nested_column.size(); + null_map.resize(null_map.size() + view.row_count); + if (view.null_map == nullptr) { + // No null value + memset(null_map.data() + old_size, 0, view.row_count); + } else { + // TODO: skip if no null in map + auto* dst = null_map.data() + old_size; + memcpy(dst, view.null_map, view.row_count); + // If there are all null values, we can skip reading nested column and just insert defaults. + if (simd::count_zero_num(reinterpret_cast(view.null_map), view.row_count) == + 0) { + nested_column.insert_many_defaults(view.row_count); + return Status::OK(); + } + } + DecodedColumnView nested_view = view; + nested_view.conversion_failure_null_map = &null_map; + nested_view.conversion_failure_null_map_offset = old_size; + auto st = nested_serde->read_column_from_decoded_values(nested_column, nested_view); + if (!st.ok()) { + null_map.resize(old_size); + nested_column.resize(old_nested_size); + } + return st; +} + bool DataTypeNullableSerDe::write_column_to_mysql_text(const IColumn& column, BufferWritable& bw, int64_t row_idx, const FormatOptions& options) const { diff --git a/be/src/core/data_type_serde/data_type_nullable_serde.h b/be/src/core/data_type_serde/data_type_nullable_serde.h index 6e069444483b87..ee1eab51941ecb 100644 --- a/be/src/core/data_type_serde/data_type_nullable_serde.h +++ b/be/src/core/data_type_serde/data_type_nullable_serde.h @@ -86,6 +86,8 @@ class DataTypeNullableSerDe : public DataTypeSerDe { const cctz::time_zone& ctz) const override; Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, int64_t end, const cctz::time_zone& ctz) const override; + Status read_column_from_decoded_values(IColumn& column, + const DecodedColumnView& view) const override; Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& row_buffer, int64_t row_idx, bool col_const, const FormatOptions& options) const override; diff --git a/be/src/core/data_type_serde/data_type_number_serde.cpp b/be/src/core/data_type_serde/data_type_number_serde.cpp index 2124547c2f89f1..3c99a53b5b07bf 100644 --- a/be/src/core/data_type_serde/data_type_number_serde.cpp +++ b/be/src/core/data_type_serde/data_type_number_serde.cpp @@ -20,6 +20,8 @@ #include #include +#include +#include #include "common/exception.h" #include "common/status.h" @@ -27,6 +29,7 @@ #include "core/data_type/define_primitive_type.h" #include "core/data_type/primitive_type.h" #include "core/data_type_serde/data_type_serde.h" +#include "core/data_type_serde/decoded_column_view.h" #include "core/packed_int128.h" #include "core/types.h" #include "core/value/timestamptz_value.h" @@ -43,6 +46,137 @@ #include "util/unaligned.h" namespace doris { +namespace { + +template +const NativeType* decoded_values_as(const DecodedColumnView& view) { + return reinterpret_cast(view.values); +} + +template +bool decoded_number_value_fits(SourceType value) { + if constexpr (std::is_floating_point_v) { + return true; + } else if constexpr (std::is_same_v) { + return value == SourceType(0) || value == SourceType(1); + } else if constexpr (std::is_signed_v) { + const auto int128_value = static_cast(value); + return int128_value >= static_cast(std::numeric_limits::lowest()) && + int128_value <= static_cast(std::numeric_limits::max()); + } else { + const auto uint128_value = static_cast(value); + if constexpr (std::is_signed_v) { + return uint128_value <= + static_cast(std::numeric_limits::max()); + } else { + return uint128_value <= + static_cast(std::numeric_limits::max()); + } + } +} + +template +Status read_number_decoded_values(IColumn& column, const DecodedColumnView& view) { + if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) { + return Status::Corruption("Decoded value buffer is null for {}", column.get_name()); + } + auto& data = + assert_cast::ColumnType&>(column).get_data(); + const auto old_size = data.size(); + const auto* values = decoded_values_as(view); + for (int64_t row = 0; row < view.row_count; ++row) { + using DorisCppType = typename PrimitiveTypeTraits::CppType; + if (decoded_column_view_row_is_null(view, row)) { + data.push_back(DorisCppType()); + continue; + } + if (!decoded_number_value_fits(values[row])) { + if (decoded_column_view_can_null_on_conversion_failure(view)) { + decoded_column_view_insert_null_on_conversion_failure(column, view, row); + continue; + } + data.resize(old_size); + return Status::DataQualityError("Decoded value is out of range for {} at row {}", + column.get_name(), row); + } + data.push_back(static_cast(values[row])); + } + return Status::OK(); +} + +template +Status read_logical_integer_decoded_values_as(IColumn& column, const DecodedColumnView& view) { + if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) { + return Status::Corruption("Decoded value buffer is null for {}", column.get_name()); + } + auto& data = + assert_cast::ColumnType&>(column).get_data(); + const auto old_size = data.size(); + const auto* values = decoded_values_as(view); + for (int64_t row = 0; row < view.row_count; ++row) { + using DorisCppType = typename PrimitiveTypeTraits::CppType; + if (decoded_column_view_row_is_null(view, row)) { + data.push_back(DorisCppType()); + continue; + } + const auto logical_value = static_cast(values[row]); + if (!decoded_number_value_fits(logical_value)) { + if (decoded_column_view_can_null_on_conversion_failure(view)) { + decoded_column_view_insert_null_on_conversion_failure(column, view, row); + continue; + } + data.resize(old_size); + return Status::DataQualityError( + "Decoded logical integer value is out of range for {} at row {}", + column.get_name(), row); + } + data.push_back(static_cast(logical_value)); + } + return Status::OK(); +} + +template +Status read_integer_decoded_values(IColumn& column, const DecodedColumnView& view) { + if (view.logical_integer_bit_width <= 0) { + return read_number_decoded_values(column, view); + } + + if (view.logical_integer_is_signed) { + switch (view.logical_integer_bit_width) { + case 8: + return read_logical_integer_decoded_values_as(column, + view); + case 16: + return read_logical_integer_decoded_values_as(column, + view); + case 32: + return read_logical_integer_decoded_values_as(column, + view); + case 64: + return read_logical_integer_decoded_values_as(column, + view); + default: + return Status::NotSupported("Unsupported decoded logical integer bit width {} for {}", + view.logical_integer_bit_width, column.get_name()); + } + } + + switch (view.logical_integer_bit_width) { + case 8: + return read_logical_integer_decoded_values_as(column, view); + case 16: + return read_logical_integer_decoded_values_as(column, view); + case 32: + return read_logical_integer_decoded_values_as(column, view); + case 64: + return read_logical_integer_decoded_values_as(column, view); + default: + return Status::NotSupported("Unsupported decoded logical integer bit width {} for {}", + view.logical_integer_bit_width, column.get_name()); + } +} + +} // namespace // Type map的基本结构 template struct TypeMap { @@ -157,6 +291,42 @@ Status DataTypeNumberSerDe::write_column_to_arrow(const IColumn& column, cons return Status::OK(); } +template +Status DataTypeNumberSerDe::read_column_from_decoded_values( + IColumn& column, const DecodedColumnView& view) const { + if constexpr (T == TYPE_BOOLEAN) { + if (view.value_kind == DecodedValueKind::BOOL) { + return read_number_decoded_values(column, view); + } + } else if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || + T == TYPE_BIGINT || T == TYPE_LARGEINT) { + if (view.value_kind == DecodedValueKind::INT32) { + return read_integer_decoded_values(column, view); + } + if (view.value_kind == DecodedValueKind::UINT32) { + return read_integer_decoded_values(column, view); + } + if (view.value_kind == DecodedValueKind::INT64) { + return read_integer_decoded_values(column, view); + } + if (view.value_kind == DecodedValueKind::UINT64) { + return read_integer_decoded_values(column, view); + } + } else if constexpr (T == TYPE_FLOAT) { + if (view.value_kind == DecodedValueKind::FLOAT) { + return read_number_decoded_values(column, view); + } + } else if constexpr (T == TYPE_DOUBLE) { + if (view.value_kind == DecodedValueKind::DOUBLE) { + return read_number_decoded_values(column, view); + } + } + return decoded_column_view_handle_conversion_failure( + column, view, + Status::NotSupported("Unsupported decoded values for {} from source kind {}", + get_name(), static_cast(view.value_kind))); +} + template Status DataTypeNumberSerDe::deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options) const { diff --git a/be/src/core/data_type_serde/data_type_number_serde.h b/be/src/core/data_type_serde/data_type_number_serde.h index b57f9f9d21298d..0e0a3acfc1aed7 100644 --- a/be/src/core/data_type_serde/data_type_number_serde.h +++ b/be/src/core/data_type_serde/data_type_number_serde.h @@ -117,6 +117,9 @@ class DataTypeNumberSerDe : public DataTypeSerDe { Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, int64_t end, const cctz::time_zone& ctz) const override; + Status read_column_from_decoded_values(IColumn& column, + const DecodedColumnView& view) const override; + Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& row_buffer, int64_t row_idx, bool col_const, const FormatOptions& options) const override; diff --git a/be/src/core/data_type_serde/data_type_serde.cpp b/be/src/core/data_type_serde/data_type_serde.cpp index ac688ae6c307a3..728cafab3469fd 100644 --- a/be/src/core/data_type_serde/data_type_serde.cpp +++ b/be/src/core/data_type_serde/data_type_serde.cpp @@ -34,6 +34,54 @@ namespace doris { DataTypeSerDe::~DataTypeSerDe() = default; +bool decoded_column_view_can_null_on_conversion_failure(const DecodedColumnView& view) { + return !view.enable_strict_mode && view.conversion_failure_null_map != nullptr; +} + +void decoded_column_view_insert_null_on_conversion_failure(IColumn& column, + const DecodedColumnView& view, + int64_t row) { + DORIS_CHECK(decoded_column_view_can_null_on_conversion_failure(view)); + DORIS_CHECK(row >= 0); + DORIS_CHECK(row < view.row_count); + DORIS_CHECK(view.conversion_failure_null_map_offset >= 0); + const auto null_map_row = view.conversion_failure_null_map_offset + row; + DORIS_CHECK(null_map_row >= 0); + DORIS_CHECK(static_cast(null_map_row) < view.conversion_failure_null_map->size()); + column.insert_default(); + (*view.conversion_failure_null_map)[null_map_row] = 1; +} + +Status decoded_column_view_handle_conversion_failure(IColumn& column, const DecodedColumnView& view, + const Status& status) { + if (!decoded_column_view_can_null_on_conversion_failure(view)) { + return status; + } + for (int64_t row = 0; row < view.row_count; ++row) { + decoded_column_view_insert_null_on_conversion_failure(column, view, row); + } + return Status::OK(); +} + +Status DataTypeSerDe::read_column_from_decoded_values(IColumn& column, + const DecodedColumnView& view) const { + return decoded_column_view_handle_conversion_failure( + column, view, + Status::NotSupported("read_column_from_decoded_values is not supported for {}", + get_name())); +} + +Status DataTypeSerDe::read_field_from_decoded_value(const IDataType& data_type, Field* field, + const DecodedColumnView& view) const { + DORIS_CHECK(field != nullptr); + DORIS_CHECK(view.row_count == 1); + auto column = data_type.create_column(); + RETURN_IF_ERROR(read_column_from_decoded_values(*column, view)); + DORIS_CHECK(column->size() == 1); + column->get(0, *field); + return Status::OK(); +} + DataTypeSerDeSPtrs create_data_type_serdes(const DataTypes& types) { DataTypeSerDeSPtrs serdes; serdes.reserve(types.size()); diff --git a/be/src/core/data_type_serde/data_type_serde.h b/be/src/core/data_type_serde/data_type_serde.h index eb7ce74fbe7e9c..baab90ea8d2a82 100644 --- a/be/src/core/data_type_serde/data_type_serde.h +++ b/be/src/core/data_type_serde/data_type_serde.h @@ -27,6 +27,7 @@ #include "common/cast_set.h" #include "common/status.h" #include "core/column/column_nullable.h" +#include "core/data_type_serde/decoded_column_view.h" #include "core/field.h" #include "core/string_buffer.hpp" #include "core/types.h" @@ -485,6 +486,14 @@ class DataTypeSerDe { int64_t start, int64_t end, const cctz::time_zone& ctz) const = 0; + // Read already decoded column values into a Doris column. The input view is format-neutral: + // file readers translate their decoder output into DecodedColumnView, while SerDe owns + // the Doris-type-specific materialization into IColumn. + virtual Status read_column_from_decoded_values(IColumn& column, + const DecodedColumnView& view) const; + virtual Status read_field_from_decoded_value(const IDataType& data_type, Field* field, + const DecodedColumnView& view) const; + // ORC serializer virtual Status write_column_to_orc(const std::string& timezone, const IColumn& column, const NullMap* null_map, diff --git a/be/src/core/data_type_serde/data_type_string_serde.cpp b/be/src/core/data_type_serde/data_type_string_serde.cpp index dc7667fefcaf48..4c7c9d02475bf0 100644 --- a/be/src/core/data_type_serde/data_type_string_serde.cpp +++ b/be/src/core/data_type_serde/data_type_string_serde.cpp @@ -22,11 +22,40 @@ #include "core/column/column_string.h" #include "core/data_type/define_primitive_type.h" +#include "core/data_type_serde/decoded_column_view.h" #include "util/jsonb_document_cast.h" #include "util/jsonb_utils.h" #include "util/jsonb_writer.h" namespace doris { +namespace { + +template +Status read_string_decoded_values(IColumn& column, const DecodedColumnView& view) { + if (view.binary_values == nullptr && decoded_column_view_has_non_null_value(view)) { + return Status::Corruption("Decoded binary values are null for {}", column.get_name()); + } + auto& string_column = assert_cast(column); + for (int64_t row = 0; row < view.row_count; ++row) { + if (decoded_column_view_row_is_null(view, row)) { + string_column.insert_default(); + continue; + } + const auto& value = (*view.binary_values)[row]; + if (value.data == nullptr && value.size > 0) { + if (decoded_column_view_can_null_on_conversion_failure(view)) { + decoded_column_view_insert_null_on_conversion_failure(column, view, row); + continue; + } + return Status::Corruption("Decoded string binary value is null for {} at row {}", + column.get_name(), row); + } + string_column.insert_data(value.data, value.size); + } + return Status::OK(); +} + +} // namespace namespace { @@ -429,6 +458,19 @@ Status DataTypeStringSerDeBase::read_column_from_arrow( return Status::OK(); } +template +Status DataTypeStringSerDeBase::read_column_from_decoded_values( + IColumn& column, const DecodedColumnView& view) const { + if (view.value_kind != DecodedValueKind::BINARY && + view.value_kind != DecodedValueKind::FIXED_BINARY) { + return decoded_column_view_handle_conversion_failure( + column, view, + Status::NotSupported("Unsupported decoded values for {} from source kind {}", + get_name(), static_cast(view.value_kind))); + } + return read_string_decoded_values(column, view); +} + template Status DataTypeStringSerDeBase::write_column_to_orc( const std::string& timezone, const IColumn& column, const NullMap* null_map, diff --git a/be/src/core/data_type_serde/data_type_string_serde.h b/be/src/core/data_type_serde/data_type_string_serde.h index 79c8450835d39c..81b80eab4a5cbf 100644 --- a/be/src/core/data_type_serde/data_type_string_serde.h +++ b/be/src/core/data_type_serde/data_type_string_serde.h @@ -203,6 +203,9 @@ class DataTypeStringSerDeBase : public DataTypeSerDe { Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, int64_t end, const cctz::time_zone& ctz) const override; + Status read_column_from_decoded_values(IColumn& column, + const DecodedColumnView& view) const override; + Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& result, int64_t row_idx, bool col_const, const FormatOptions& options) const override { diff --git a/be/src/core/data_type_serde/data_type_time_serde.cpp b/be/src/core/data_type_serde/data_type_time_serde.cpp index e57fd08a271339..c40e671793c848 100644 --- a/be/src/core/data_type_serde/data_type_time_serde.cpp +++ b/be/src/core/data_type_serde/data_type_time_serde.cpp @@ -20,11 +20,38 @@ #include "core/data_type/data_type_decimal.h" #include "core/data_type/data_type_number.h" #include "core/data_type/primitive_type.h" +#include "core/data_type_serde/decoded_column_view.h" #include "core/value/time_value.h" #include "exprs/function/cast/cast_base.h" #include "exprs/function/cast/cast_to_time_impl.hpp" namespace doris { +namespace { + +TimeValue::TimeType read_time_decoded_value(const DecodedColumnView& view, int64_t row) { + int64_t micros = 0; + if (view.value_kind == DecodedValueKind::INT32) { + const auto* values = reinterpret_cast(view.values); + micros = static_cast(values[row]) * 1000; + } else { + const auto* values = reinterpret_cast(view.values); + micros = values[row]; + if (view.time_unit == DecodedTimeUnit::MILLIS) { + micros *= 1000; + } else if (view.time_unit == DecodedTimeUnit::NANOS) { + micros /= 1000; + } + } + const bool negative = micros < 0; + const int64_t abs_micros = std::abs(micros); + return TimeValue::make_time( + abs_micros / TimeValue::ONE_HOUR_MICROSECONDS, + (abs_micros % TimeValue::ONE_HOUR_MICROSECONDS) / TimeValue::ONE_MINUTE_MICROSECONDS, + (abs_micros % TimeValue::ONE_MINUTE_MICROSECONDS) / TimeValue::ONE_SECOND_MICROSECONDS, + abs_micros % TimeValue::ONE_SECOND_MICROSECONDS, negative); +} + +} // namespace Status DataTypeTimeV2SerDe::write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& result, @@ -145,6 +172,27 @@ Status DataTypeTimeV2SerDe::from_string_strict_mode(StringRef& str, IColumn& col return Status::OK(); } +Status DataTypeTimeV2SerDe::read_column_from_decoded_values(IColumn& column, + const DecodedColumnView& view) const { + if (view.value_kind != DecodedValueKind::INT32 && view.value_kind != DecodedValueKind::INT64) { + return decoded_column_view_handle_conversion_failure( + column, view, + Status::NotSupported("TIMEV2 decoded reader expects INT32 or INT64 source")); + } + if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) { + return Status::Corruption("Decoded value buffer is null for {}", column.get_name()); + } + auto& data = assert_cast(column).get_data(); + for (int64_t row = 0; row < view.row_count; ++row) { + if (decoded_column_view_row_is_null(view, row)) { + data.push_back(TimeValue::TimeType()); + continue; + } + data.push_back(read_time_decoded_value(view, row)); + } + return Status::OK(); +} + template Status DataTypeTimeV2SerDe::from_int_batch(const typename IntDataType::ColumnType& int_col, ColumnNullable& target_col) const { diff --git a/be/src/core/data_type_serde/data_type_time_serde.h b/be/src/core/data_type_serde/data_type_time_serde.h index db703616b497cf..e3fccf379c913a 100644 --- a/be/src/core/data_type_serde/data_type_time_serde.h +++ b/be/src/core/data_type_serde/data_type_time_serde.h @@ -67,6 +67,8 @@ class DataTypeTimeV2SerDe : public DataTypeNumberSerDe Status from_decimal_strict_mode_batch(const typename DecimalDataType::ColumnType& decimal_col, IColumn& target_col) const; + Status read_column_from_decoded_values(IColumn& column, + const DecodedColumnView& view) const override; int get_scale() const override { return _scale; } protected: diff --git a/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp b/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp index e8c26f6db68e75..abc8b86700023a 100644 --- a/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp +++ b/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp @@ -18,14 +18,64 @@ #include "core/data_type_serde/data_type_timestamptz_serde.h" #include +#include #include "core/data_type/primitive_type.h" +#include "core/data_type_serde/decoded_column_view.h" #include "core/value/timestamptz_value.h" #include "exprs/function/cast/cast_parameters.h" #include "exprs/function/cast/cast_to_string.h" #include "exprs/function/cast/cast_to_timestamptz.h" namespace doris { +namespace { + +#pragma pack(1) +struct DecodedInt96Timestamp { + int64_t nanos_of_day; + int32_t julian_day; + + int64_t to_timestamp_micros() const { + static constexpr int32_t JULIAN_EPOCH_OFFSET_DAYS = 2440588; + static constexpr int64_t MICROS_IN_DAY = 86400000000; + static constexpr int64_t NANOS_PER_MICROSECOND = 1000; + return (julian_day - JULIAN_EPOCH_OFFSET_DAYS) * MICROS_IN_DAY + + nanos_of_day / NANOS_PER_MICROSECOND; + } +}; +#pragma pack() +static_assert(sizeof(DecodedInt96Timestamp) == 12); + +void append_timestamptz_from_utc_epoch_micros(ColumnTimeStampTz::Container& data, + int64_t timestamp_micros) { + static constexpr int64_t MICROS_PER_SECOND = 1000000; + static const auto UTC = cctz::utc_time_zone(); + + int64_t epoch_seconds = timestamp_micros / MICROS_PER_SECOND; + int64_t micros_of_second = timestamp_micros % MICROS_PER_SECOND; + if (micros_of_second < 0) { + micros_of_second += MICROS_PER_SECOND; + --epoch_seconds; + } + + TimestampTzValue timestamp_tz; + timestamp_tz.from_unixtime(epoch_seconds, UTC); + timestamp_tz.set_microsecond(static_cast(micros_of_second)); + data.push_back(timestamp_tz); +} + +int64_t decoded_timestamp_micros(const DecodedColumnView& view, int64_t value) { + if (view.time_unit == DecodedTimeUnit::MILLIS) { + return value * 1000; + } + if (view.time_unit == DecodedTimeUnit::NANOS) { + return value / 1000; + } + return value; +} + +} // namespace + // The implementation of these functions mainly refers to data_type_datetimev2_serde.cpp Status DataTypeTimeStampTzSerDe::from_string(StringRef& str, IColumn& column, @@ -246,6 +296,41 @@ Status DataTypeTimeStampTzSerDe::write_column_to_orc(const std::string& timezone return Status::OK(); } +Status DataTypeTimeStampTzSerDe::read_column_from_decoded_values( + IColumn& column, const DecodedColumnView& view) const { + if (view.value_kind != DecodedValueKind::INT64 && view.value_kind != DecodedValueKind::INT96) { + return decoded_column_view_handle_conversion_failure( + column, view, + Status::NotSupported("TIMESTAMPTZ decoded reader expects INT64 or INT96 source")); + } + if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) { + return Status::Corruption("Decoded value buffer is null for {}", column.get_name()); + } + + auto& data = assert_cast(column).get_data(); + if (view.value_kind == DecodedValueKind::INT96) { + const auto* values = reinterpret_cast(view.values); + for (int64_t row = 0; row < view.row_count; ++row) { + if (decoded_column_view_row_is_null(view, row)) { + data.push_back(TimestampTzValue()); + continue; + } + append_timestamptz_from_utc_epoch_micros(data, values[row].to_timestamp_micros()); + } + return Status::OK(); + } + + const auto* values = reinterpret_cast(view.values); + for (int64_t row = 0; row < view.row_count; ++row) { + if (decoded_column_view_row_is_null(view, row)) { + data.push_back(TimestampTzValue()); + continue; + } + append_timestamptz_from_utc_epoch_micros(data, decoded_timestamp_micros(view, values[row])); + } + return Status::OK(); +} + std::string DataTypeTimeStampTzSerDe::to_olap_string(const Field& field) const { return CastToString::from_timestamptz(field.get(), 6); } diff --git a/be/src/core/data_type_serde/data_type_timestamptz_serde.h b/be/src/core/data_type_serde/data_type_timestamptz_serde.h index 0a595935d8fdd6..133e37fed33b03 100644 --- a/be/src/core/data_type_serde/data_type_timestamptz_serde.h +++ b/be/src/core/data_type_serde/data_type_timestamptz_serde.h @@ -22,6 +22,7 @@ #include #include "core/data_type_serde/data_type_number_serde.h" +#include "core/data_type_serde/decoded_column_view.h" #include "core/types.h" #include "core/value/time_value.h" @@ -72,6 +73,9 @@ class DataTypeTimeStampTzSerDe : public DataTypeNumberSerDe +#include +#include + +#include "common/status.h" +#include "core/column/column_nullable.h" +#include "core/string_ref.h" + +namespace cctz { +class time_zone; +} // namespace cctz + +namespace doris { + +class IColumn; + +// 已解码 column batch 的物理值来源类型。 +// 该枚举只描述通用内存布局,不包含 Parquet/ORC/Arrow 等格式专有类型。 +enum class DecodedValueKind { + BOOL, + INT32, + UINT32, + INT64, + UINT64, + INT96, + FLOAT, + DOUBLE, + BINARY, + FIXED_BINARY, +}; + +enum class DecodedTimeUnit { + UNKNOWN, + MILLIS, + MICROS, + NANOS, +}; + +struct DecodedColumnView { + DecodedValueKind value_kind = DecodedValueKind::INT32; + DecodedTimeUnit time_unit = DecodedTimeUnit::UNKNOWN; + int64_t row_count = 0; + // Optional logical integer annotation. value_kind still describes the physical buffer layout. + int logical_integer_bit_width = -1; + int decimal_precision = -1; + int decimal_scale = -1; + int fixed_length = -1; + bool logical_integer_is_signed = true; + bool timestamp_is_adjusted_to_utc = false; + const uint8_t* values = nullptr; + const uint8_t* null_map = nullptr; + const std::vector* binary_values = nullptr; + const cctz::time_zone* timezone = nullptr; + bool enable_strict_mode = false; + NullMap* conversion_failure_null_map = nullptr; + int64_t conversion_failure_null_map_offset = 0; +}; + +inline bool decoded_column_view_row_is_null(const DecodedColumnView& view, int64_t row) { + return view.null_map != nullptr && view.null_map[row] != 0; +} + +inline bool decoded_column_view_has_non_null_value(const DecodedColumnView& view) { + if (view.null_map == nullptr) { + return view.row_count > 0; + } + + // TODO(gabriel): optimize null map check with SIMD or bitset if needed. + for (int64_t row = 0; row < view.row_count; ++row) { + if (view.null_map[row] == 0) { + return true; + } + } + return false; +} + +bool decoded_column_view_can_null_on_conversion_failure(const DecodedColumnView& view); + +void decoded_column_view_insert_null_on_conversion_failure(IColumn& column, + const DecodedColumnView& view, + int64_t row); + +Status decoded_column_view_handle_conversion_failure(IColumn& column, const DecodedColumnView& view, + const Status& status); + +} // namespace doris diff --git a/be/src/exec/operator/file_scan_operator.cpp b/be/src/exec/operator/file_scan_operator.cpp index 2a87f413a15bd6..d4035d37e27106 100644 --- a/be/src/exec/operator/file_scan_operator.cpp +++ b/be/src/exec/operator/file_scan_operator.cpp @@ -24,6 +24,7 @@ #include "exec/operator/olap_scan_operator.h" #include "exec/operator/scan_operator.h" #include "exec/scan/file_scanner.h" +#include "exec/scan/file_scanner_v2.h" #include "exec/scan/scanner_context.h" #include "format/format_common.h" #include "storage/storage_engine.h" @@ -119,10 +120,32 @@ Status FileScanLocalState::_init_scanners(std::list* scanners) { _max_scanners); shard_num = std::max(shard_num, 1U); _kv_cache = std::make_unique(shard_num); + const TFileScanRangeParams* scan_params = nullptr; + if (state()->get_query_ctx() != nullptr && + state()->get_query_ctx()->file_scan_range_params_map.count(parent_id()) > 0) { + scan_params = &state()->get_query_ctx()->file_scan_range_params_map[parent_id()]; + } else { + scan_params = _split_source->get_params(); + } + const bool is_load = + state()->desc_tbl().get_tuple_descriptor(scan_params->src_tuple_id) != nullptr; + // TODO: Use scanner v2 for all queries. + const bool use_file_scanner_v2 = + state()->query_options().__isset.enable_file_scanner_v2 && + state()->query_options().enable_file_scanner_v2 && !is_load && + _split_source->all_scan_ranges_match(*scan_params, FileScannerV2::is_supported); + _operator_profile->add_info_string("UseScannerV2", use_file_scanner_v2 ? "true" : "false"); for (int i = 0; i < _max_scanners; ++i) { - std::unique_ptr scanner = FileScanner::create_unique( - state(), this, p._limit, _split_source, _scanner_profile.get(), _kv_cache.get(), - &p._colname_to_slot_id); + ScannerSPtr scanner; + if (use_file_scanner_v2) { + scanner = FileScannerV2::create_shared(state(), this, p._limit, _split_source, + _scanner_profile.get(), _kv_cache.get(), + &p._colname_to_slot_id); + } else { + scanner = FileScanner::create_shared(state(), this, p._limit, _split_source, + _scanner_profile.get(), _kv_cache.get(), + &p._colname_to_slot_id); + } RETURN_IF_ERROR(scanner->init(state(), _conjuncts)); scanners->push_back(std::move(scanner)); } diff --git a/be/src/exec/operator/file_scan_operator.h b/be/src/exec/operator/file_scan_operator.h index d4e31195a4459a..c47488fa357c77 100644 --- a/be/src/exec/operator/file_scan_operator.h +++ b/be/src/exec/operator/file_scan_operator.h @@ -29,6 +29,7 @@ namespace doris { class FileScanner; +class FileScannerV2; } // namespace doris namespace doris { @@ -56,6 +57,7 @@ class FileScanLocalState final : public ScanLocalState { private: friend class FileScanner; + friend class FileScannerV2; PushDownType _should_push_down_bloom_filter() const override { return PushDownType::UNACCEPTABLE; } diff --git a/be/src/exec/operator/result_sink_operator.h b/be/src/exec/operator/result_sink_operator.h index 4ead2985d85162..7ff4b18c9b2817 100644 --- a/be/src/exec/operator/result_sink_operator.h +++ b/be/src/exec/operator/result_sink_operator.h @@ -45,7 +45,7 @@ struct ResultFileOptions { TParquetCompressionType::type parquet_commpression_type; TParquetVersion::type parquet_version; bool parquert_disable_dictionary = false; - bool enable_int96_timestamps = false; + bool enable_int96_timestamps = true; //note: use outfile with parquet format, have deprecated 9:schema and 10:file_properties //But in order to consider the compatibility when upgrading, so add a bool to check //Now the code version is 1.1.2, so when the version is after 1.2, could remove this code. diff --git a/be/src/exec/scan/access_path_parser.cpp b/be/src/exec/scan/access_path_parser.cpp new file mode 100644 index 00000000000000..b215212b6d861b --- /dev/null +++ b/be/src/exec/scan/access_path_parser.cpp @@ -0,0 +1,479 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/scan/access_path_parser.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include "common/cast_set.h" +#include "common/consts.h" +#include "core/assert_cast.h" +#include "core/data_type/data_type.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_struct.h" +#include "runtime/descriptors.h" +#include "util/string_util.h" + +namespace doris { +namespace { + +bool is_scanner_materialized_virtual_column(const std::string& column_name) { + return column_name == BeConsts::ICEBERG_ROWID_COL; +} + +bool parse_non_negative_int(std::string_view value, int32_t* result) { + DORIS_CHECK(result != nullptr); + int32_t parsed = -1; + const auto* begin = value.data(); + const auto* end = begin + value.size(); + const auto [ptr, ec] = std::from_chars(begin, end, parsed); + if (ec != std::errc() || ptr != end || parsed < 0) { + return false; + } + *result = parsed; + return true; +} + +std::string access_path_to_string(const std::vector& path) { + return fmt::format("{}", fmt::join(path, ".")); +} + +format::ColumnDefinition* find_or_add_child(format::ColumnDefinition* parent, int32_t id, + std::string name, DataTypePtr type) { + DORIS_CHECK(parent != nullptr); + for (auto& child : parent->children) { + if ((child.has_identifier_field_id() && child.get_identifier_field_id() == id) || + child.name == name) { + return &child; + } + } + parent->children.push_back({ + .identifier = Field::create_field(id), + .name = std::move(name), + .type = std::move(type), + .children = {}, + .default_expr = nullptr, + .is_partition_key = false, + }); + return &parent->children.back(); +} + +void inherit_schema_metadata(format::ColumnDefinition* column, + const format::ColumnDefinition* schema_column) { + if (column == nullptr || schema_column == nullptr) { + return; + } + column->name_mapping = schema_column->name_mapping; +} + +const format::ColumnDefinition* find_schema_child_by_path( + const format::ColumnDefinition* schema_column, const std::string& child_path) { + if (schema_column == nullptr) { + return nullptr; + } + int32_t parsed_field_id = -1; + if (parse_non_negative_int(child_path, &parsed_field_id)) { + const auto child_it = std::ranges::find_if( + schema_column->children, [&](const format::ColumnDefinition& child) { + return child.has_identifier_field_id() && + child.get_identifier_field_id() == parsed_field_id; + }); + return child_it == schema_column->children.end() ? nullptr : &*child_it; + } + const auto child_it = std::ranges::find_if(schema_column->children, [&](const auto& child) { + if (to_lower(child.name) == to_lower(child_path)) { + return true; + } + return std::ranges::any_of(child.name_mapping, [&](const std::string& alias) { + return to_lower(alias) == to_lower(child_path); + }); + }); + return child_it == schema_column->children.end() ? nullptr : &*child_it; +} + +int32_t schema_field_id(const format::ColumnDefinition* schema_column) { + if (schema_column == nullptr || !schema_column->has_identifier_field_id()) { + return -1; + } + return schema_column->get_identifier_field_id(); +} + +int32_t schema_field_id_or(const format::ColumnDefinition* schema_column, int32_t fallback) { + const auto field_id = schema_field_id(schema_column); + return field_id >= 0 ? field_id : fallback; +} + +std::string schema_field_name_or(const format::ColumnDefinition* schema_column, + std::string fallback) { + return schema_column == nullptr || schema_column->name.empty() ? std::move(fallback) + : schema_column->name; +} + +struct AccessPathNode { + bool project_all = false; + std::map children; +}; + +void merge_access_path_node(AccessPathNode* dst, const AccessPathNode& src) { + DORIS_CHECK(dst != nullptr); + if (dst->project_all) { + return; + } + if (src.project_all) { + dst->project_all = true; + dst->children.clear(); + return; + } + for (const auto& [path, child] : src.children) { + merge_access_path_node(&dst->children[path], child); + } +} + +void insert_access_path(AccessPathNode* root, const std::vector& path, + size_t path_idx) { + DORIS_CHECK(root != nullptr); + if (root->project_all) { + return; + } + if (path_idx >= path.size()) { + root->project_all = true; + root->children.clear(); + return; + } + insert_access_path(&root->children[path[path_idx]], path, path_idx + 1); +} + +Status build_nested_children_from_access_node(format::ColumnDefinition* column, + const DataTypePtr& type, const AccessPathNode& node, + const std::string& path, + const format::ColumnDefinition* schema_column); + +// Expand a full complex-column projection into table-schema children when the table format provides +// an external/current schema. Without this, `SELECT complex_col` or `SELECT *` leaves +// ColumnDefinition::children empty, so ColumnMapper treats the root complex column as a scalar +// mapping and later tries to cast the old file shape to the current table shape directly. +// +// Examples: +// - STRUCT country/city projected from an old file STRUCT country/population/location should +// create children country and city, so city can be materialized as missing/default. +// - ARRAY> should create the array element wrapper and then the element +// struct children item and quantity. +// - MAP> should create semantic children key/value directly, then +// expand the value struct children full_name and age. Do not introduce a physical entries +// wrapper here: ColumnMapper and TableReader treat MAP children as [key, value]. +Status build_all_nested_children_from_schema(format::ColumnDefinition* column, + const DataTypePtr& type, const std::string& path, + const format::ColumnDefinition* schema_column) { + DORIS_CHECK(column != nullptr); + + const auto nested_type = remove_nullable(type); + AccessPathNode project_all; + project_all.project_all = true; + switch (nested_type->get_primitive_type()) { + case TYPE_STRUCT: { + const auto& struct_type = assert_cast(*nested_type); + for (size_t field_idx = 0; field_idx < struct_type.get_elements().size(); ++field_idx) { + const auto field_name = struct_type.get_element_name(field_idx); + const auto* schema_child = find_schema_child_by_path(schema_column, field_name); + auto* child = find_or_add_child( + column, schema_field_id_or(schema_child, cast_set(field_idx)), + schema_field_name_or(schema_child, field_name), + struct_type.get_element(field_idx)); + inherit_schema_metadata(child, schema_child); + RETURN_IF_ERROR(build_nested_children_from_access_node( + child, child->type, project_all, path + "." + child->name, schema_child)); + } + return Status::OK(); + } + case TYPE_ARRAY: { + const auto& array_type = assert_cast(*nested_type); + const auto* element_schema = schema_column != nullptr && !schema_column->children.empty() + ? &schema_column->children[0] + : nullptr; + auto* child = find_or_add_child(column, schema_field_id_or(element_schema, 0), "element", + array_type.get_nested_type()); + inherit_schema_metadata(child, element_schema); + return build_nested_children_from_access_node(child, child->type, project_all, path + ".*", + element_schema); + } + case TYPE_MAP: { + const auto& map_type = assert_cast(*nested_type); + const auto* key_schema = schema_column != nullptr && !schema_column->children.empty() + ? &schema_column->children[0] + : nullptr; + const auto* value_schema = schema_column != nullptr && schema_column->children.size() > 1 + ? &schema_column->children[1] + : nullptr; + auto* key_child = find_or_add_child(column, schema_field_id_or(key_schema, 0), "key", + map_type.get_key_type()); + inherit_schema_metadata(key_child, key_schema); + RETURN_IF_ERROR(build_nested_children_from_access_node( + key_child, key_child->type, project_all, path + ".KEYS", key_schema)); + auto* value_child = find_or_add_child(column, schema_field_id_or(value_schema, 1), "value", + map_type.get_value_type()); + inherit_schema_metadata(value_child, value_schema); + RETURN_IF_ERROR(build_nested_children_from_access_node( + value_child, value_child->type, project_all, path + ".VALUES", value_schema)); + return Status::OK(); + } + default: + return Status::OK(); + } +} + +Status build_struct_children_from_access_node(format::ColumnDefinition* column, + const DataTypeStruct& struct_type, + const AccessPathNode& node, const std::string& path, + const format::ColumnDefinition* schema_column) { + DORIS_CHECK(column != nullptr); + for (const auto& [child_path, child_node] : node.children) { + // Struct children are resolved by name or schema field id. We do not treat a numeric + // child token as a struct ordinal, because `col.0` becomes ambiguous once the struct + // evolves. Position-based access needs a separate design if it is required later. + if (child_path == "OFFSET" || child_path == "*" || child_path == "KEYS" || + child_path == "VALUES") { + return Status::NotSupported( + "AccessPathParser does not support access path {} for slot {}", + path + "." + child_path, column->name); + } + + // Prefer the table/schema ColumnDefinition because it carries field ids and aliases. + // Fallback to the struct type name only for formats without external schema metadata. + const auto* schema_child = find_schema_child_by_path(schema_column, child_path); + int32_t field_id = schema_field_id(schema_child); + std::string field_name = schema_child == nullptr ? child_path : schema_child->name; + DataTypePtr field_type = schema_child == nullptr ? nullptr : schema_child->type; + if (field_id < 0 || field_type == nullptr) { + for (size_t field_idx = 0; field_idx < struct_type.get_elements().size(); ++field_idx) { + if (to_lower(struct_type.get_element_name(field_idx)) == to_lower(field_name)) { + field_id = cast_set(field_idx); + field_name = struct_type.get_element_name(field_idx); + field_type = struct_type.get_element(field_idx); + break; + } + } + } + + if (field_id < 0 || field_type == nullptr) { + return Status::NotSupported( + "AccessPathParser does not support access path {} for slot {}", + path + "." + child_path, column->name); + } + // TODO: For TVF Parquet files without field ids, this fallback uses the struct ordinal as + // the table child identifier. BY_NAME mapping should instead keep a string identifier and + // let TableColumnMapper resolve the file-local child id from the Parquet schema. + auto* child = find_or_add_child(column, field_id, field_name, field_type); + inherit_schema_metadata(child, schema_child); + RETURN_IF_ERROR(build_nested_children_from_access_node( + child, child->type, child_node, path + "." + child_path, schema_child)); + } + return Status::OK(); +} + +Status build_map_children_from_access_node(format::ColumnDefinition* column, + const DataTypeMap& map_type, const AccessPathNode& node, + const std::string& path, + const format::ColumnDefinition* schema_column) { + DORIS_CHECK(column != nullptr); + AccessPathNode key_node; + AccessPathNode value_node; + bool need_key = false; + bool need_value = false; + + for (const auto& [child_path, child_node] : node.children) { + if (child_path == "OFFSET") { + return Status::NotSupported( + "AccessPathParser does not support access path {} for slot {}", + path + "." + child_path, column->name); + } + if (child_path == "KEYS") { + need_key = true; + merge_access_path_node(&key_node, child_node); + continue; + } + if (child_path == "VALUES") { + need_key = true; + key_node.project_all = true; + key_node.children.clear(); + need_value = true; + merge_access_path_node(&value_node, child_node); + continue; + } + if (child_path == "*") { + need_key = true; + key_node.project_all = true; + key_node.children.clear(); + need_value = true; + merge_access_path_node(&value_node, child_node); + continue; + } + return Status::NotSupported("AccessPathParser does not support access path {} for slot {}", + path + "." + child_path, column->name); + } + if (need_key && !need_value) { + // A key-only MAP projection is not independently materializable yet. FileScannerV2 can + // describe a projection such as `m.KEYS`, but the downstream file block -> table block path + // still builds a ColumnMap from key column + value column + offsets. If the value child is + // omitted here, TableReader/ColumnMapper cannot reconstruct a valid table MAP column even + // though the query only needs keys. + // + // Example: + // SELECT map_keys(m) FROM t; + // or + // SELECT * FROM t WHERE array_contains(map_keys(m), 'k1'); + // + // The access path only asks for `m.KEYS`, but the scan still has to read `m.VALUES` as a + // temporary full projection until map materialization supports constructing a table MAP + // from keys only. + need_value = true; + value_node.project_all = true; + value_node.children.clear(); + } + + if (!need_key && !need_value) { + return Status::OK(); + } + + const auto* key_schema = schema_column != nullptr && !schema_column->children.empty() + ? &schema_column->children[0] + : nullptr; + const auto* value_schema = schema_column != nullptr && schema_column->children.size() > 1 + ? &schema_column->children[1] + : nullptr; + if (need_key) { + auto* key_child = find_or_add_child(column, schema_field_id_or(key_schema, 0), "key", + map_type.get_key_type()); + inherit_schema_metadata(key_child, key_schema); + RETURN_IF_ERROR(build_nested_children_from_access_node(key_child, key_child->type, key_node, + path + ".KEYS", key_schema)); + } + if (need_value) { + auto* value_child = find_or_add_child(column, schema_field_id_or(value_schema, 1), "value", + map_type.get_value_type()); + inherit_schema_metadata(value_child, value_schema); + RETURN_IF_ERROR(build_nested_children_from_access_node( + value_child, value_child->type, value_node, path + ".VALUES", value_schema)); + } + return Status::OK(); +} + +Status build_nested_children_from_access_node(format::ColumnDefinition* column, + const DataTypePtr& type, const AccessPathNode& node, + const std::string& path, + const format::ColumnDefinition* schema_column) { + DORIS_CHECK(column != nullptr); + if (node.project_all || node.children.empty()) { + return build_all_nested_children_from_schema(column, type, path, schema_column); + } + + const auto nested_type = remove_nullable(type); + switch (nested_type->get_primitive_type()) { + case TYPE_STRUCT: + return build_struct_children_from_access_node( + column, assert_cast(*nested_type), node, path, + schema_column); + case TYPE_ARRAY: { + if (node.children.size() != 1 || !node.children.contains("*")) { + return Status::NotSupported( + "AccessPathParser does not support access path {} for slot {}", path, + column->name); + } + const auto& array_type = assert_cast(*nested_type); + const auto* element_schema = schema_column != nullptr && !schema_column->children.empty() + ? &schema_column->children[0] + : nullptr; + auto* child = find_or_add_child(column, schema_field_id_or(element_schema, 0), "element", + array_type.get_nested_type()); + inherit_schema_metadata(child, element_schema); + return build_nested_children_from_access_node(child, child->type, node.children.at("*"), + path + ".*", element_schema); + } + case TYPE_MAP: + return build_map_children_from_access_node( + column, assert_cast(*nested_type), node, path, schema_column); + default: + return Status::NotSupported("AccessPathParser does not support access path {} for slot {}", + path, column->name); + } +} + +} // namespace + +Status AccessPathParser::build_nested_children(format::ColumnDefinition* column, + const std::vector& access_paths, + const format::ColumnDefinition* schema_column) { + DORIS_CHECK(column != nullptr); + if (is_scanner_materialized_virtual_column(column->name)) { + return Status::OK(); + } + if (!is_complex_type(remove_nullable(column->type)->get_primitive_type())) { + return Status::OK(); + } + + AccessPathNode root; + // Build tree for AccessPathNode. + // For example, for access paths ["a.b", "a.c", "d"], the tree will be: + // root + // ├── a + // │ ├── b + // │ └── c + // └── d + for (const auto& access_path : access_paths) { + // TODO: Support META access paths if needed. Currently AccessPathParser only supports + // DATA access paths. + if (access_path.type != TAccessPathType::DATA || !access_path.__isset.data_access_path) { + return Status::NotSupported( + "AccessPathParser only supports DATA access paths for slot {}", column->name); + } + const auto& path = access_path.data_access_path.path; + if (path.empty()) { + insert_access_path(&root, path, 0); + continue; + } + int32_t top_level_id = -1; + if (to_lower(path.front()) != to_lower(column->name) && + (!parse_non_negative_int(path.front(), &top_level_id) || + !column->has_identifier_field_id() || + top_level_id != column->get_identifier_field_id())) { + return Status::NotSupported("AccessPathParser access path {} does not match slot {}", + access_path_to_string(path), column->name); + } + insert_access_path(&root, path, 1); + } + // Recursively build nested children for the column based on the AccessPathNode tree. + return build_nested_children_from_access_node(column, column->type, root, column->name, + schema_column); +} + +Status AccessPathParser::build_nested_children(format::ColumnDefinition* column, + const SlotDescriptor* slot_desc, + const format::ColumnDefinition* schema_column) { + DORIS_CHECK(column != nullptr); + DORIS_CHECK(slot_desc != nullptr); + return build_nested_children(column, slot_desc->all_access_paths(), schema_column); +} + +} // namespace doris diff --git a/be/src/exec/scan/access_path_parser.h b/be/src/exec/scan/access_path_parser.h new file mode 100644 index 00000000000000..1aa4c5b89d492a --- /dev/null +++ b/be/src/exec/scan/access_path_parser.h @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "common/status.h" +#include "format_v2/column_data.h" +#include "gen_cpp/PlanNodes_types.h" + +namespace doris { + +class SlotDescriptor; + +class AccessPathParser { +public: + static Status build_nested_children(format::ColumnDefinition* column, + const SlotDescriptor* slot_desc, + const format::ColumnDefinition* schema_column); + + static Status build_nested_children(format::ColumnDefinition* column, + const std::vector& access_paths, + const format::ColumnDefinition* schema_column); +}; + +} // namespace doris diff --git a/be/src/exec/scan/file_scanner.cpp b/be/src/exec/scan/file_scanner.cpp index 6419ce4f65c5e2..6811efcdd5da6e 100644 --- a/be/src/exec/scan/file_scanner.cpp +++ b/be/src/exec/scan/file_scanner.cpp @@ -1078,8 +1078,31 @@ Status FileScanner::_get_next_reader() { _cur_reader = std::move(mc_reader); } else if (range.__isset.table_format_params && range.table_format_params.table_format_type == "paimon") { - if (_state->query_options().__isset.enable_paimon_cpp_reader && - _state->query_options().enable_paimon_cpp_reader) { + const auto& paimon_params = range.table_format_params.paimon_params; + bool use_paimon_cpp_reader = false; + if (paimon_params.__isset.reader_type) { + switch (paimon_params.reader_type) { + case TPaimonReaderType::PAIMON_CPP: + use_paimon_cpp_reader = true; + break; + case TPaimonReaderType::PAIMON_JNI: + break; + case TPaimonReaderType::PAIMON_NATIVE: + return Status::InternalError( + "invalid PAIMON_NATIVE reader_type for paimon FORMAT_JNI split, " + "possibly caused by FE/BE protocol mismatch"); + default: + return Status::InternalError( + "unknown paimon reader_type for paimon FORMAT_JNI split, possibly " + "caused by FE/BE protocol mismatch"); + } + } else { + // TODO: Remove this fallback after all FE versions set TPaimonReaderType. + use_paimon_cpp_reader = + _state->query_options().__isset.enable_paimon_cpp_reader && + _state->query_options().enable_paimon_cpp_reader; + } + if (use_paimon_cpp_reader) { auto cpp_reader = PaimonCppReader::create_unique(_file_slot_descs, _state, _profile, range, _params); if (!_is_load && !_push_down_conjuncts.empty()) { @@ -1771,7 +1794,6 @@ Status FileScanner::_init_expr_ctxes() { if (is_file_slot) { _is_file_slot.emplace(slot_id); _file_slot_descs.emplace_back(it->second); - _file_col_names.push_back(it->second->col_name()); } _column_descs.push_back(col_desc); diff --git a/be/src/exec/scan/file_scanner.h b/be/src/exec/scan/file_scanner.h index fbcbca464a5546..3675fd2449711e 100644 --- a/be/src/exec/scan/file_scanner.h +++ b/be/src/exec/scan/file_scanner.h @@ -135,8 +135,6 @@ class FileScanner : public Scanner { bool _cur_reader_eof = false; // File source slot descriptors std::vector _file_slot_descs; - // col names from _file_slot_descs - std::vector _file_col_names; // Unified column descriptors for init_reader (includes file, partition, missing, synthesized cols) std::vector _column_descs; @@ -149,6 +147,7 @@ class FileScanner : public Scanner { // dest slot name to index in _dest_vexpr_ctx; std::unordered_map _dest_slot_name_to_idx; // col name to default value expr + // TODO: only used by json reader. Could we delete this? std::unordered_map _col_default_value_ctx; // the map values of dest slot id to src slot desc // if there is not key of dest slot id in dest_sid_to_src_sid_without_trans, it will be set to nullptr @@ -195,7 +194,6 @@ class FileScanner : public Scanner { std::shared_ptr _io_ctx; // Whether to fill partition columns from path, default is true. - bool _fill_partition_from_path = true; std::unordered_map> _partition_col_descs; std::unordered_map _partition_value_is_null; diff --git a/be/src/exec/scan/file_scanner_v2.cpp b/be/src/exec/scan/file_scanner_v2.cpp new file mode 100644 index 00000000000000..57791cb2d85f2d --- /dev/null +++ b/be/src/exec/scan/file_scanner_v2.cpp @@ -0,0 +1,835 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/scan/file_scanner_v2.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "common/cast_set.h" +#include "common/config.h" +#include "common/consts.h" +#include "common/status.h" +#include "core/assert_cast.h" +#include "core/block/column_with_type_and_name.h" +#include "core/column/column.h" +#include "core/data_type/data_type.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type_serde/data_type_serde.h" +#include "core/string_ref.h" +#include "exec/common/util.hpp" +#include "exec/operator/scan_operator.h" +#include "exec/scan/access_path_parser.h" +#include "exprs/runtime_filter_expr.h" +#include "exprs/vexpr.h" +#include "exprs/vexpr_context.h" +#include "exprs/vslot_ref.h" +#include "format/format_common.h" +#include "format_v2/column_mapper.h" +#include "format_v2/jni/iceberg_sys_table_reader.h" +#include "format_v2/jni/jdbc_reader.h" +#include "format_v2/jni/max_compute_jni_reader.h" +#include "format_v2/jni/trino_connector_jni_reader.h" +#include "format_v2/table/hive_reader.h" +#include "format_v2/table/hudi_reader.h" +#include "format_v2/table/iceberg_reader.h" +#include "format_v2/table/paimon_reader.h" +#include "format_v2/table/remote_doris_reader.h" +#include "format_v2/table_reader.h" +#include "io/fs/file_meta_cache.h" +#include "io/io_common.h" +#include "runtime/descriptors.h" +#include "runtime/exec_env.h" +#include "runtime/runtime_state.h" +#include "service/backend_options.h" +#include "storage/id_manager.h" + +namespace doris { +namespace { + +std::string table_format_name(const TFileRangeDesc& range) { + return range.__isset.table_format_params ? range.table_format_params.table_format_type + : "NotSet"; +} + +TFileFormatType::type get_range_format_type(const TFileScanRangeParams& params, + const TFileRangeDesc& range) { + return range.__isset.format_type ? range.format_type : params.format_type; +} + +bool is_supported_table_format(const TFileRangeDesc& range) { + const auto table_format = table_format_name(range); + if (table_format == "hudi" && range.__isset.table_format_params && + range.table_format_params.__isset.hudi_params && + range.table_format_params.hudi_params.__isset.delta_logs && + !range.table_format_params.hudi_params.delta_logs.empty()) { + // Hudi MOR splits need log-file merge semantics and must stay on the existing JNI path. + // FileScannerV2 currently supports native Parquet data files only. + return false; + } + return table_format == "NotSet" || table_format == "tvf" || table_format == "hive" || + table_format == "iceberg" || table_format == "paimon" || table_format == "hudi"; +} + +bool is_supported_arrow_table_format(const TFileRangeDesc& range) { + return table_format_name(range) == "remote_doris"; +} + +bool is_supported_jni_table_format(const TFileRangeDesc& range) { + const auto table_format = table_format_name(range); + if (table_format == "paimon") { + return range.__isset.table_format_params && + range.table_format_params.__isset.paimon_params && + range.table_format_params.paimon_params.__isset.reader_type && + range.table_format_params.paimon_params.reader_type == TPaimonReaderType::PAIMON_JNI; + } + return table_format == "jdbc" || table_format == "iceberg" || table_format == "hudi" || + table_format == "max_compute" || table_format == "trino_connector"; +} + +bool is_csv_format(TFileFormatType::type format_type) { + switch (format_type) { + case TFileFormatType::FORMAT_CSV_PLAIN: + case TFileFormatType::FORMAT_CSV_GZ: + case TFileFormatType::FORMAT_CSV_BZ2: + case TFileFormatType::FORMAT_CSV_LZ4FRAME: + case TFileFormatType::FORMAT_CSV_LZ4BLOCK: + case TFileFormatType::FORMAT_CSV_LZOP: + case TFileFormatType::FORMAT_CSV_DEFLATE: + case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK: + case TFileFormatType::FORMAT_PROTO: + return true; + default: + return false; + } +} + +bool is_text_format(TFileFormatType::type format_type) { + return format_type == TFileFormatType::FORMAT_TEXT; +} + +bool is_json_format(TFileFormatType::type format_type) { + return format_type == TFileFormatType::FORMAT_JSON; +} + +bool is_native_format(TFileFormatType::type format_type) { + return format_type == TFileFormatType::FORMAT_NATIVE; +} + +bool is_partition_slot(const TFileScanSlotInfo& slot_info, const std::string& column_name) { + if (column_name.starts_with(BeConsts::GLOBAL_ROWID_COL) || + column_name == BeConsts::ICEBERG_ROWID_COL) { + return false; + } + return slot_info.__isset.category ? slot_info.category == TColumnCategory::PARTITION_KEY + : !slot_info.is_file_slot; +} + +bool is_data_file_slot(const TFileScanSlotInfo& slot_info, const std::string& column_name) { + if (column_name.starts_with(BeConsts::GLOBAL_ROWID_COL) || + column_name == BeConsts::ICEBERG_ROWID_COL) { + return false; + } + // CSV and other non-self-describing formats need FE slot descriptors for only the columns that + // are physically read from the file. Partition/default/virtual columns stay in TableReader's + // mapping layer and are materialized after the file-local block is read. New FE provides an + // explicit category; old FE falls back to `is_file_slot`. + if (slot_info.__isset.category) { + return slot_info.category == TColumnCategory::REGULAR || + slot_info.category == TColumnCategory::GENERATED; + } + return slot_info.is_file_slot; +} + +Status rewrite_slot_refs_to_global_index( + VExprSPtr* expr, + const std::unordered_map& slot_id_to_global_index) { + DORIS_CHECK(expr != nullptr); + if (*expr == nullptr) { + return Status::OK(); + } + if (auto* runtime_filter = dynamic_cast(expr->get()); + runtime_filter != nullptr) { + auto impl = runtime_filter->get_impl(); + DORIS_CHECK(impl != nullptr); + RETURN_IF_ERROR(rewrite_slot_refs_to_global_index(&impl, slot_id_to_global_index)); + runtime_filter->set_impl(std::move(impl)); + return Status::OK(); + } + if ((*expr)->is_slot_ref()) { + const auto* slot_ref = assert_cast(expr->get()); + const auto global_index_it = slot_id_to_global_index.find(slot_ref->slot_id()); + if (global_index_it == slot_id_to_global_index.end()) { + DORIS_CHECK(slot_ref->slot_id() >= 0); + const auto global_index = format::GlobalIndex(cast_set(slot_ref->slot_id())); + *expr = VSlotRef::create_shared(cast_set(global_index.value()), + cast_set(global_index.value()), -1, + slot_ref->data_type(), slot_ref->column_name()); + RETURN_IF_ERROR(expr->get()->prepare(nullptr, RowDescriptor(), nullptr)); + return Status::OK(); + } + const auto global_index = global_index_it->second; + *expr = VSlotRef::create_shared(cast_set(global_index.value()), + cast_set(global_index.value()), -1, + slot_ref->data_type(), slot_ref->column_name()); + RETURN_IF_ERROR(expr->get()->prepare(nullptr, RowDescriptor(), nullptr)); + return Status::OK(); + } + auto children = (*expr)->children(); + for (auto& child : children) { + if (child == nullptr) { + continue; + } + RETURN_IF_ERROR(rewrite_slot_refs_to_global_index(&child, slot_id_to_global_index)); + } + (*expr)->set_children(std::move(children)); + return Status::OK(); +} + +} // namespace + +#ifdef BE_TEST +Status FileScannerV2::TEST_to_file_format(TFileFormatType::type format_type, + format::FileFormat* file_format) { + return _to_file_format(format_type, file_format); +} + +bool FileScannerV2::TEST_is_partition_slot(const TFileScanSlotInfo& slot_info, + const std::string& column_name) { + return is_partition_slot(slot_info, column_name); +} + +bool FileScannerV2::TEST_is_data_file_slot(const TFileScanSlotInfo& slot_info, + const std::string& column_name) { + return is_data_file_slot(slot_info, column_name); +} + +Status FileScannerV2::TEST_rewrite_slot_refs_to_global_index( + VExprSPtr* expr, + const std::unordered_map& slot_id_to_global_index) { + return rewrite_slot_refs_to_global_index(expr, slot_id_to_global_index); +} +#endif + +bool FileScannerV2::is_supported(const TFileScanRangeParams& params, const TFileRangeDesc& range) { + const auto format_type = get_range_format_type(params, range); + if (format_type == TFileFormatType::FORMAT_PARQUET) { + return is_supported_table_format(range); + } else if (format_type == TFileFormatType::FORMAT_ARROW) { + return is_supported_arrow_table_format(range); + } else if (format_type == TFileFormatType::FORMAT_JNI) { + return is_supported_jni_table_format(range); + } else if (is_csv_format(format_type) || is_text_format(format_type) || + is_json_format(format_type) || is_native_format(format_type)) { + return is_supported_table_format(range); + } else { + LOG(WARNING) << "Unsupported file format type " << format_type << " for file scanner v2"; + return false; + } +} + +FileScannerV2::FileScannerV2(RuntimeState* state, FileScanLocalState* local_state, int64_t limit, + std::shared_ptr split_source, + RuntimeProfile* profile, ShardedKVCache* kv_cache, + const std::unordered_map* colname_to_slot_id) + : Scanner(state, local_state, limit, profile), + _split_source(std::move(split_source)), + _kv_cache(kv_cache) { + (void)colname_to_slot_id; + if (state->get_query_ctx() != nullptr && + state->get_query_ctx()->file_scan_range_params_map.count(local_state->parent_id()) > 0) { + _params = &(state->get_query_ctx()->file_scan_range_params_map[local_state->parent_id()]); + } else { + _params = _split_source->get_params(); + } +} + +Status FileScannerV2::init(RuntimeState* state, const VExprContextSPtrs& conjuncts) { + RETURN_IF_ERROR(Scanner::init(state, conjuncts)); + _get_block_timer = + ADD_TIMER_WITH_LEVEL(_local_state->scanner_profile(), "FileScannerV2GetBlockTime", 1); + _file_counter = + ADD_COUNTER_WITH_LEVEL(_local_state->scanner_profile(), "FileNumber", TUnit::UNIT, 1); + _file_read_bytes_counter = ADD_COUNTER_WITH_LEVEL(_local_state->scanner_profile(), + "FileReadBytes", TUnit::BYTES, 1); + _file_read_calls_counter = ADD_COUNTER_WITH_LEVEL(_local_state->scanner_profile(), + "FileReadCalls", TUnit::UNIT, 1); + _file_read_time_counter = + ADD_TIMER_WITH_LEVEL(_local_state->scanner_profile(), "FileReadTime", 1); + _adaptive_batch_predicted_rows_counter = ADD_COUNTER_WITH_LEVEL( + _local_state->scanner_profile(), "AdaptiveBatchPredictedRows", TUnit::UNIT, 1); + _adaptive_batch_actual_bytes_counter = ADD_COUNTER_WITH_LEVEL( + _local_state->scanner_profile(), "AdaptiveBatchActualBytes", TUnit::BYTES, 1); + _adaptive_batch_probe_count_counter = ADD_COUNTER_WITH_LEVEL( + _local_state->scanner_profile(), "AdaptiveBatchProbeCount", TUnit::UNIT, 1); + _file_cache_statistics = std::make_unique(); + _file_reader_stats = std::make_unique(); + RETURN_IF_ERROR(_init_io_ctx()); + _io_ctx->file_cache_stats = _file_cache_statistics.get(); + _io_ctx->file_reader_stats = _file_reader_stats.get(); + _io_ctx->is_disposable = _state->query_options().disable_file_cache; + return Status::OK(); +} + +Status FileScannerV2::_open_impl(RuntimeState* state) { + RETURN_IF_CANCELLED(state); + RETURN_IF_ERROR(Scanner::_open_impl(state)); + RETURN_IF_ERROR(_split_source->get_next(&_first_scan_range, &_current_range)); + if (_first_scan_range) { + RETURN_IF_ERROR(_create_table_reader_for_format(_current_range, &_table_reader)); + DORIS_CHECK(_table_reader != nullptr); + RETURN_IF_ERROR(_init_expr_ctxes()); + RETURN_IF_ERROR(_init_table_reader(_current_range)); + } + return Status::OK(); +} + +Status FileScannerV2::_get_block_impl(RuntimeState* state, Block* block, bool* eof) { + while (true) { + RETURN_IF_CANCELLED(state); + if (!_has_prepared_split) { + RETURN_IF_ERROR(_prepare_next_split(eof)); + if (*eof) { + return Status::OK(); + } + } + + { + SCOPED_TIMER(_get_block_timer); + if (_should_run_adaptive_batch_size()) { + _table_reader->set_batch_size(_predict_reader_batch_rows()); + } + RETURN_IF_ERROR(_table_reader->get_block(block, eof)); + } + if (*eof) { + _state->update_num_finished_scan_range(1); + _has_prepared_split = false; + *eof = false; + continue; + } + _update_adaptive_batch_size(*block); + return Status::OK(); + } +} + +Status FileScannerV2::_prepare_next_split(bool* eos) { + bool has_next = _first_scan_range; + if (!_first_scan_range) { + RETURN_IF_ERROR(_split_source->get_next(&has_next, &_current_range)); + } + _first_scan_range = false; + if (!has_next || _should_stop) { + *eos = true; + return Status::OK(); + } + DORIS_CHECK(_table_reader != nullptr); + _current_range_path = _current_range.path; + _init_adaptive_batch_size_state(get_range_format_type(*_params, _current_range)); + RETURN_IF_ERROR(_prepare_table_reader_split(_current_range)); + COUNTER_UPDATE(_file_counter, 1); + _has_prepared_split = true; + *eos = false; + return Status::OK(); +} + +Status FileScannerV2::_init_table_reader(const TFileRangeDesc& range) { + const auto format_type = get_range_format_type(*_params, range); + format::FileFormat file_format; + RETURN_IF_ERROR(_to_file_format(format_type, &file_format)); + DORIS_CHECK(_table_reader != nullptr); + + format::TableColumnPredicates table_column_predicates; + RETURN_IF_ERROR(_build_table_column_predicates(&table_column_predicates)); + VExprContextSPtrs table_conjuncts; + RETURN_IF_ERROR(_build_table_conjuncts(&table_conjuncts)); + RETURN_IF_ERROR(_table_reader->init({ + .projected_columns = _projected_columns, + .column_predicates = std::move(table_column_predicates), + .conjuncts = std::move(table_conjuncts), + .format = file_format, + .scan_params = const_cast(_params), + .io_ctx = _io_ctx, + .runtime_state = _state, + .scanner_profile = _local_state->scanner_profile(), + .file_slot_descs = &_file_slot_descs, + .push_down_agg_type = _local_state->get_push_down_agg_type(), + .condition_cache_digest = _local_state->get_condition_cache_digest(), + })); + return Status::OK(); +} + +Status FileScannerV2::_create_table_reader_for_format( + const TFileRangeDesc& range, std::unique_ptr* reader) const { + DORIS_CHECK(reader != nullptr); + const auto table_format = table_format_name(range); + if (table_format == "NotSet" || table_format == "tvf") { + *reader = std::make_unique(); + } else if (table_format == "hive") { + *reader = format::hive::HiveReader::create_unique(); + } else if (table_format == "iceberg") { + if (get_range_format_type(*_params, range) == TFileFormatType::FORMAT_JNI) { + *reader = std::make_unique(); + } else { + *reader = std::make_unique(); + } + } else if (table_format == "paimon") { + *reader = std::make_unique(); + } else if (table_format == "hudi") { + *reader = std::make_unique(); + } else if (table_format == "jdbc") { + *reader = std::make_unique(); + } else if (table_format == "max_compute") { + const auto* mc_desc = + static_cast(_output_tuple_desc->table_desc()); + RETURN_IF_ERROR(mc_desc->init_status()); + *reader = std::make_unique(mc_desc); + } else if (table_format == "trino_connector") { + *reader = std::make_unique(); + } else if (table_format == "remote_doris") { + *reader = std::make_unique(); + } else { + return Status::NotSupported("FileScannerV2 does not support table format {}", table_format); + } + return Status::OK(); +} + +Status FileScannerV2::_prepare_table_reader_split(const TFileRangeDesc& range) { + std::map partition_values; + RETURN_IF_ERROR(_generate_partition_values(range, &partition_values)); + format::FileFormat current_split_format; + RETURN_IF_ERROR(_to_file_format(get_range_format_type(*_params, range), ¤t_split_format)); + RETURN_IF_ERROR(_table_reader->prepare_split({ + .partition_values = std::move(partition_values), + .cache = _kv_cache, + .current_range = range, + .current_split_format = current_split_format, + .global_rowid_context = _create_global_rowid_context(range), + })); + return Status::OK(); +} + +bool FileScannerV2::_should_enable_file_meta_cache() const { + return ExecEnv::GetInstance()->file_meta_cache()->enabled() && + _split_source->num_scan_ranges() < config::max_external_file_meta_cache_num / 3; +} + +std::optional FileScannerV2::_create_global_rowid_context( + const TFileRangeDesc& range) const { + if (!_need_global_rowid_column) { + return std::nullopt; + } + auto& id_file_map = _state->get_id_file_map(); + DORIS_CHECK(id_file_map != nullptr); + const auto file_id = id_file_map->get_file_mapping_id( + std::make_shared(_local_state->cast().parent_id(), + range, _should_enable_file_meta_cache())); + return format::GlobalRowIdContext { + .version = IdManager::ID_VERSION, + .backend_id = BackendOptions::get_backend_id(), + .file_id = file_id, + }; +} + +Status FileScannerV2::_generate_partition_values( + const TFileRangeDesc& range, std::map* partition_values) const { + DORIS_CHECK(partition_values != nullptr); + partition_values->clear(); + if (!range.__isset.columns_from_path_keys || !range.__isset.columns_from_path) { + return Status::OK(); + } + DORIS_CHECK(range.columns_from_path_keys.size() == range.columns_from_path.size()); + for (size_t idx = 0; idx < range.columns_from_path_keys.size(); ++idx) { + const auto& key = range.columns_from_path_keys[idx]; + const auto it = _partition_slot_descs.find(key); + if (it == _partition_slot_descs.end()) { + continue; + } + const auto& value = range.columns_from_path[idx]; + const bool is_null = range.__isset.columns_from_path_is_null && + idx < range.columns_from_path_is_null.size() && + range.columns_from_path_is_null[idx]; + Field field; + DORIS_CHECK(it->second.slot_desc != nullptr); + RETURN_IF_ERROR(_parse_partition_value(it->second.slot_desc, value, is_null, &field)); + partition_values->emplace(it->second.canonical_name, std::move(field)); + } + return Status::OK(); +} + +Status FileScannerV2::_parse_partition_value(const SlotDescriptor* slot_desc, + const std::string& value, bool is_null, + Field* field) const { + DORIS_CHECK(slot_desc != nullptr); + DORIS_CHECK(field != nullptr); + if (is_null) { + *field = Field::create_field(Null()); + return Status::OK(); + } + const auto data_type = remove_nullable(slot_desc->get_data_type_ptr()); + auto column = data_type->create_column(); + auto serde = data_type->get_serde(); + DataTypeSerDe::FormatOptions options; + options.converted_from_string = true; + StringRef ref(value.data(), value.size()); + RETURN_IF_ERROR(serde->from_string(ref, *column, options)); + DORIS_CHECK(column->size() == 1); + *field = (*column)[0]; + return Status::OK(); +} + +Status FileScannerV2::_init_expr_ctxes() { + _slot_id_to_desc.clear(); + _slot_id_to_global_index.clear(); + _partition_slot_descs.clear(); + _file_slot_descs.clear(); + for (const auto* slot_desc : _output_tuple_desc->slots()) { + _slot_id_to_desc.emplace(slot_desc->id(), slot_desc); + } + DORIS_CHECK(_table_reader != nullptr); + RETURN_IF_ERROR(_build_projected_columns(*_table_reader)); + return Status::OK(); +} + +Status FileScannerV2::_build_projected_columns(const format::TableReader& table_reader) { + _projected_columns.clear(); + _projected_columns.reserve(_params->required_slots.size()); + _need_global_rowid_column = false; + format::ProjectedColumnBuildContext build_context { + .scan_params = _params, + .range = &_current_range, + .runtime_state = _state, + }; + + for (size_t slot_idx = 0; slot_idx < _params->required_slots.size(); ++slot_idx) { + const auto& slot_info = _params->required_slots[slot_idx]; + const auto it = _slot_id_to_desc.find(slot_info.slot_id); + if (it == _slot_id_to_desc.end()) { + return Status::InternalError("Unknown source slot descriptor, slot_id={}", + slot_info.slot_id); + } + auto column = _build_table_column(it->second); + if (column.name.starts_with(BeConsts::GLOBAL_ROWID_COL)) { + _need_global_rowid_column = true; + } + RETURN_IF_ERROR(_build_default_expr(slot_info, &column.default_expr)); + build_context.schema_column.reset(); + RETURN_IF_ERROR(table_reader.annotate_projected_column(slot_info, &build_context, &column)); + // Build nested children from access paths generated by the slot's access-path + // expressions. A projected column can therefore contain only a subset of the schema + // column's nested children. + RETURN_IF_ERROR(AccessPathParser::build_nested_children( + &column, it->second, + build_context.schema_column.has_value() ? &*build_context.schema_column : nullptr)); + if (is_partition_slot(slot_info, column.name)) { + column.is_partition_key = true; + _partition_slot_descs.emplace( + column.name, + PartitionSlotInfo {.slot_desc = it->second, .canonical_name = column.name}); + for (const auto& alias : column.name_mapping) { + _partition_slot_descs.emplace( + alias, + PartitionSlotInfo {.slot_desc = it->second, .canonical_name = column.name}); + } + } else if (is_data_file_slot(slot_info, column.name)) { + _file_slot_descs.push_back(const_cast(it->second)); + } + const auto global_index = format::GlobalIndex(slot_idx); + _slot_id_to_global_index.emplace(slot_info.slot_id, global_index); + _projected_columns.push_back(std::move(column)); + } + RETURN_IF_ERROR(table_reader.validate_projected_columns(build_context)); + return Status::OK(); +} + +Status FileScannerV2::_build_default_expr(const TFileScanSlotInfo& slot_info, + VExprContextSPtr* ctx) const { + DORIS_CHECK(ctx != nullptr); + if (slot_info.__isset.default_value_expr && !slot_info.default_value_expr.nodes.empty()) { + return VExpr::create_expr_tree(slot_info.default_value_expr, *ctx); + } + + if (_params->__isset.default_value_of_src_slot) { + const auto it = _params->default_value_of_src_slot.find(slot_info.slot_id); + if (it != _params->default_value_of_src_slot.end() && !it->second.nodes.empty()) { + return VExpr::create_expr_tree(it->second, *ctx); + } + } + return Status::OK(); +} + +format::ColumnDefinition FileScannerV2::_build_table_column(const SlotDescriptor* slot_desc) { + DORIS_CHECK(slot_desc != nullptr); + format::ColumnDefinition column; + // TODO(gabriel): why always BY_NAME here? + column.identifier = Field::create_field(slot_desc->col_name()); + column.name = slot_desc->col_name(); + column.type = slot_desc->get_data_type_ptr(); + return column; +} + +Status FileScannerV2::_build_table_column_predicates( + format::TableColumnPredicates* predicates) const { + DORIS_CHECK(predicates != nullptr); + predicates->clear(); + const auto& slot_predicates = _local_state->cast()._slot_id_to_predicates; + for (const auto& [slot_id, slot_predicate_list] : slot_predicates) { + const auto it = _slot_id_to_desc.find(slot_id); + if (it == _slot_id_to_desc.end()) { + continue; + } + const auto global_index_it = _slot_id_to_global_index.find(slot_id); + if (global_index_it == _slot_id_to_global_index.end()) { + continue; + } + (*predicates)[global_index_it->second] = slot_predicate_list; + } + return Status::OK(); +} + +Status FileScannerV2::_build_table_conjuncts(VExprContextSPtrs* conjuncts) const { + DORIS_CHECK(conjuncts != nullptr); + conjuncts->clear(); + conjuncts->reserve(_conjuncts.size()); + for (const auto& conjunct : _conjuncts) { + VExprSPtr root; + RETURN_IF_ERROR(format::clone_table_expr_tree(conjunct->root(), &root)); + RETURN_IF_ERROR(rewrite_slot_refs_to_global_index(&root, _slot_id_to_global_index)); + conjuncts->push_back(VExprContext::create_shared(std::move(root))); + } + return Status::OK(); +} + +TFileFormatType::type FileScannerV2::_get_current_format_type() const { + return get_range_format_type(*_params, _current_range); +} + +Status FileScannerV2::_to_file_format(TFileFormatType::type format_type, + format::FileFormat* file_format) { + DORIS_CHECK(file_format != nullptr); + switch (format_type) { + case TFileFormatType::FORMAT_PARQUET: + *file_format = format::FileFormat::PARQUET; + return Status::OK(); + case TFileFormatType::FORMAT_JNI: + *file_format = format::FileFormat::JNI; + return Status::OK(); + case TFileFormatType::FORMAT_CSV_PLAIN: + case TFileFormatType::FORMAT_CSV_GZ: + case TFileFormatType::FORMAT_CSV_BZ2: + case TFileFormatType::FORMAT_CSV_LZ4FRAME: + case TFileFormatType::FORMAT_CSV_LZ4BLOCK: + case TFileFormatType::FORMAT_CSV_LZOP: + case TFileFormatType::FORMAT_CSV_DEFLATE: + case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK: + case TFileFormatType::FORMAT_PROTO: + *file_format = format::FileFormat::CSV; + return Status::OK(); + case TFileFormatType::FORMAT_TEXT: + *file_format = format::FileFormat::TEXT; + return Status::OK(); + case TFileFormatType::FORMAT_JSON: + *file_format = format::FileFormat::JSON; + return Status::OK(); + case TFileFormatType::FORMAT_NATIVE: + *file_format = format::FileFormat::NATIVE; + return Status::OK(); + case TFileFormatType::FORMAT_ARROW: + *file_format = format::FileFormat::ARROW; + return Status::OK(); + default: + return Status::NotSupported("FileScannerV2 does not support file format {}", + to_string(format_type)); + } +} + +Status FileScannerV2::_init_io_ctx() { + _io_ctx = std::make_shared(); + _io_ctx->query_id = &_state->query_id(); + return Status::OK(); +} + +void FileScannerV2::_reset_adaptive_batch_size_state() { + _block_size_predictor.reset(); + COUNTER_SET(_adaptive_batch_predicted_rows_counter, int64_t(0)); + COUNTER_SET(_adaptive_batch_actual_bytes_counter, int64_t(0)); +} + +void FileScannerV2::_init_adaptive_batch_size_state(TFileFormatType::type format_type) { + _reset_adaptive_batch_size_state(); + if (!_should_enable_adaptive_batch_size(format_type)) { + return; + } + + // V2 native file readers do not have reliable row-width hints before the first batch. Start + // every split with a small probe, then learn bytes-per-row from the materialized table block + // and keep later batches close to RuntimeState::preferred_block_size_bytes(). + _block_size_predictor = std::make_unique( + _state->preferred_block_size_bytes(), 0.0, ADAPTIVE_BATCH_INITIAL_PROBE_ROWS, + _state->batch_size()); +} + +bool FileScannerV2::_should_enable_adaptive_batch_size(TFileFormatType::type format_type) const { + if (!config::enable_adaptive_batch_size) { + return false; + } + switch (format_type) { + case TFileFormatType::FORMAT_PARQUET: + case TFileFormatType::FORMAT_ORC: + case TFileFormatType::FORMAT_CSV_PLAIN: + case TFileFormatType::FORMAT_CSV_GZ: + case TFileFormatType::FORMAT_CSV_BZ2: + case TFileFormatType::FORMAT_CSV_LZ4FRAME: + case TFileFormatType::FORMAT_CSV_LZ4BLOCK: + case TFileFormatType::FORMAT_CSV_LZOP: + case TFileFormatType::FORMAT_CSV_DEFLATE: + case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK: + case TFileFormatType::FORMAT_PROTO: + case TFileFormatType::FORMAT_TEXT: + case TFileFormatType::FORMAT_JSON: + case TFileFormatType::FORMAT_JNI: + return true; + default: + return false; + } +} + +bool FileScannerV2::_should_run_adaptive_batch_size() const { + // COUNT pushdown emits synthetic rows from file metadata and does not materialize file columns, + // so there is no useful row-width sample to learn from. + return _block_size_predictor != nullptr && + _local_state->get_push_down_agg_type() != TPushAggOp::type::COUNT; +} + +size_t FileScannerV2::_predict_reader_batch_rows() { + DORIS_CHECK(_block_size_predictor != nullptr); + // Before history exists this returns the probe row count; after update(), it returns roughly + // preferred_block_size_bytes / EWMA(bytes_per_row), capped by RuntimeState::batch_size(). + const size_t predicted_rows = _block_size_predictor->predict_next_rows(); + COUNTER_SET(_adaptive_batch_predicted_rows_counter, static_cast(predicted_rows)); + return predicted_rows; +} + +void FileScannerV2::_update_adaptive_batch_size(const Block& block) { + if (!_should_run_adaptive_batch_size()) { + return; + } + COUNTER_SET(_adaptive_batch_actual_bytes_counter, static_cast(block.bytes())); + if (block.rows() == 0) { + return; + } + // The sample is taken after TableReader has finalized file-local columns to table columns. + // This matches the memory shape seen by upstream operators and catches very wide nested + // columns, such as map/string payloads, after the first probe batch. + if (!_block_size_predictor->has_history()) { + COUNTER_UPDATE(_adaptive_batch_probe_count_counter, 1); + } + _block_size_predictor->update(block); +} + +Status FileScannerV2::close(RuntimeState* state) { + if (!_try_close()) { + return Status::OK(); + } + if (_table_reader != nullptr) { + RETURN_IF_ERROR(_table_reader->close()); + _report_condition_cache_profile(); + _table_reader.reset(); + } + return Scanner::close(state); +} + +void FileScannerV2::try_stop() { + Scanner::try_stop(); + if (_io_ctx) { + _io_ctx->should_stop = true; + } +} + +void FileScannerV2::update_realtime_counters() { + if (_file_reader_stats == nullptr) { + return; + } + const int64_t bytes_read = _file_reader_stats->read_bytes; + COUNTER_SET(_file_read_bytes_counter, bytes_read); + COUNTER_SET(_file_read_calls_counter, cast_set(_file_reader_stats->read_calls)); + COUNTER_SET(_file_read_time_counter, cast_set(_file_reader_stats->read_time_ns)); +} + +void FileScannerV2::_collect_profile_before_close() { + _report_file_reader_predicate_filtered_rows(); + Scanner::_collect_profile_before_close(); + if (_file_reader_stats != nullptr) { + COUNTER_SET(_file_read_bytes_counter, cast_set(_file_reader_stats->read_bytes)); + COUNTER_SET(_file_read_calls_counter, cast_set(_file_reader_stats->read_calls)); + COUNTER_SET(_file_read_time_counter, cast_set(_file_reader_stats->read_time_ns)); + } + // Query profiles can be collected before Scanner::close() runs. Publish condition-cache + // counters here as well, using deltas so this method and close() cannot double count. + _report_condition_cache_profile(); +} + +bool FileScannerV2::_should_update_load_counters() const { + if (_is_load) { + return true; + } + // TVF based loads (e.g. http_stream, group commit relay) plan the load source as a + // tvf query scan without src tuple desc, so _is_load is false. But rows filtered by + // the load's WHERE clause still need to be reported as unselected rows. FILE_STREAM + // is only reachable from such load entries, never from normal queries, so use it to + // identify these scanners. + return (_params != nullptr && _params->__isset.file_type && + _params->file_type == TFileType::FILE_STREAM) || + (_current_range.__isset.file_type && _current_range.file_type == TFileType::FILE_STREAM); +} + +void FileScannerV2::_report_file_reader_predicate_filtered_rows() { + const int64_t filtered_rows = _io_ctx != nullptr ? _io_ctx->predicate_filtered_rows : 0; + const int64_t filtered_delta = filtered_rows - _reported_predicate_filtered_rows; + if (filtered_delta > 0) { + // File readers can evaluate localized conjuncts before a block reaches Scanner. Count + // those rows as scanner-level unselected rows so load statistics stay identical no matter + // whether a predicate is pushed down or evaluated by Scanner::_filter_output_block(). + _counter.num_rows_unselected += filtered_delta; + _reported_predicate_filtered_rows = filtered_rows; + } +} + +void FileScannerV2::_report_condition_cache_profile() { + auto* local_state = static_cast(_local_state); + const int64_t hit_count = + _table_reader != nullptr ? _table_reader->condition_cache_hit_count() : 0; + const int64_t hit_delta = hit_count - _reported_condition_cache_hit_count; + if (hit_delta > 0) { + COUNTER_UPDATE(local_state->_condition_cache_hit_counter, hit_delta); + _reported_condition_cache_hit_count = hit_count; + } + const int64_t filtered_rows = _io_ctx != nullptr ? _io_ctx->condition_cache_filtered_rows : 0; + const int64_t filtered_delta = filtered_rows - _reported_condition_cache_filtered_rows; + if (filtered_delta > 0) { + COUNTER_UPDATE(local_state->_condition_cache_filtered_rows_counter, filtered_delta); + _reported_condition_cache_filtered_rows = filtered_rows; + } +} + +} // namespace doris diff --git a/be/src/exec/scan/file_scanner_v2.h b/be/src/exec/scan/file_scanner_v2.h new file mode 100644 index 00000000000000..bc493bfbd85c59 --- /dev/null +++ b/be/src/exec/scan/file_scanner_v2.h @@ -0,0 +1,162 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "common/factory_creator.h" +#include "common/status.h" +#include "core/block/block.h" +#include "exec/operator/file_scan_operator.h" +#include "exec/scan/scanner.h" +#include "exec/scan/split_source_connector.h" +#include "exprs/vexpr_fwd.h" +#include "format_v2/column_mapper.h" +#include "format_v2/table_reader.h" +#include "gen_cpp/Descriptors_types.h" +#include "gen_cpp/PlanNodes_types.h" +#include "io/io_common.h" +#include "runtime/runtime_profile.h" +#include "storage/segment/adaptive_block_size_predictor.h" + +namespace doris { + +class RuntimeState; +class SlotDescriptor; +class TFileRangeDesc; +class TFileScanRangeParams; +class ShardedKVCache; + +class FileScannerV2 final : public Scanner { + ENABLE_FACTORY_CREATOR(FileScannerV2); + +public: + static constexpr const char* NAME = "FileScannerV2"; + static constexpr size_t ADAPTIVE_BATCH_INITIAL_PROBE_ROWS = 32; + + static bool is_supported(const TFileScanRangeParams& params, const TFileRangeDesc& range); +#ifdef BE_TEST + static Status TEST_to_file_format(TFileFormatType::type format_type, + format::FileFormat* file_format); + static bool TEST_is_partition_slot(const TFileScanSlotInfo& slot_info, + const std::string& column_name); + static bool TEST_is_data_file_slot(const TFileScanSlotInfo& slot_info, + const std::string& column_name); + static Status TEST_rewrite_slot_refs_to_global_index( + VExprSPtr* expr, + const std::unordered_map& slot_id_to_global_index); +#endif + + FileScannerV2(RuntimeState* state, FileScanLocalState* parent, int64_t limit, + std::shared_ptr split_source, RuntimeProfile* profile, + ShardedKVCache* kv_cache, + const std::unordered_map* colname_to_slot_id); + + Status init(RuntimeState* state, const VExprContextSPtrs& conjuncts) override; + Status _open_impl(RuntimeState* state) override; + Status close(RuntimeState* state) override; + void try_stop() override; + std::string get_name() override { return FileScannerV2::NAME; } + std::string get_current_scan_range_name() override { return _current_range_path; } + void update_realtime_counters() override; + +protected: + Status _get_block_impl(RuntimeState* state, Block* block, bool* eof) override; + void _collect_profile_before_close() override; + bool _should_update_load_counters() const override; + +private: + TFileFormatType::type _get_current_format_type() const; + Status _init_io_ctx(); + Status _init_expr_ctxes(); + Status _prepare_next_split(bool* eos); + Status _init_table_reader(const TFileRangeDesc& range); + Status _create_table_reader_for_format(const TFileRangeDesc& range, + std::unique_ptr* reader) const; + Status _prepare_table_reader_split(const TFileRangeDesc& range); + bool _should_enable_file_meta_cache() const; + std::optional _create_global_rowid_context( + const TFileRangeDesc& range) const; + Status _generate_partition_values(const TFileRangeDesc& range, + std::map* partition_values) const; + Status _parse_partition_value(const SlotDescriptor* slot_desc, const std::string& value, + bool is_null, Field* field) const; + Status _build_projected_columns(const format::TableReader& table_reader); + Status _build_default_expr(const TFileScanSlotInfo& slot_info, VExprContextSPtr* ctx) const; + static format::ColumnDefinition _build_table_column(const SlotDescriptor* slot_desc); + Status _build_table_column_predicates(format::TableColumnPredicates* predicates) const; + Status _build_table_conjuncts(VExprContextSPtrs* conjuncts) const; + static Status _to_file_format(TFileFormatType::type format_type, + format::FileFormat* file_format); + void _reset_adaptive_batch_size_state(); + void _init_adaptive_batch_size_state(TFileFormatType::type format_type); + bool _should_enable_adaptive_batch_size(TFileFormatType::type format_type) const; + bool _should_run_adaptive_batch_size() const; + size_t _predict_reader_batch_rows(); + void _update_adaptive_batch_size(const Block& block); + void _report_file_reader_predicate_filtered_rows(); + void _report_condition_cache_profile(); + + struct PartitionSlotInfo { + const SlotDescriptor* slot_desc = nullptr; + std::string canonical_name; + }; + + const TFileScanRangeParams* _params = nullptr; + std::shared_ptr _split_source; + bool _first_scan_range = false; + bool _has_prepared_split = false; + TFileRangeDesc _current_range; + std::string _current_range_path; + + std::unique_ptr _table_reader; + std::vector _projected_columns; + // File formats without embedded schema, such as CSV, still need the FE slot descriptors in + // file-column order. This mirrors old FileScanner::_file_slot_descs and is passed only to + // readers that cannot derive their schema from file metadata. + std::vector _file_slot_descs; + bool _need_global_rowid_column = false; + std::unordered_map _slot_id_to_desc; + std::unordered_map _slot_id_to_global_index; + std::unordered_map _partition_slot_descs; + + std::unique_ptr _file_cache_statistics; + std::unique_ptr _file_reader_stats; + std::shared_ptr _io_ctx; + ShardedKVCache* _kv_cache = nullptr; + + RuntimeProfile::Counter* _get_block_timer = nullptr; + RuntimeProfile::Counter* _file_counter = nullptr; + RuntimeProfile::Counter* _file_read_bytes_counter = nullptr; + RuntimeProfile::Counter* _file_read_calls_counter = nullptr; + RuntimeProfile::Counter* _file_read_time_counter = nullptr; + RuntimeProfile::Counter* _adaptive_batch_predicted_rows_counter = nullptr; + RuntimeProfile::Counter* _adaptive_batch_actual_bytes_counter = nullptr; + RuntimeProfile::Counter* _adaptive_batch_probe_count_counter = nullptr; + std::unique_ptr _block_size_predictor; + int64_t _reported_predicate_filtered_rows = 0; + int64_t _reported_condition_cache_hit_count = 0; + int64_t _reported_condition_cache_filtered_rows = 0; +}; + +} // namespace doris diff --git a/be/src/exec/scan/split_source_connector.h b/be/src/exec/scan/split_source_connector.h index 5926baff303cbf..320f6f90d0dd02 100644 --- a/be/src/exec/scan/split_source_connector.h +++ b/be/src/exec/scan/split_source_connector.h @@ -17,6 +17,8 @@ #pragma once +#include + #include "common/config.h" #include "core/custom_allocator.h" #include "runtime/runtime_state.h" @@ -45,6 +47,15 @@ class SplitSourceConnector { virtual TFileScanRangeParams* get_params() = 0; + virtual bool all_scan_ranges_match( + const TFileScanRangeParams& params, + const std::function& + predicate) { + (void)params; + (void)predicate; + return false; + } + protected: template , typename V2 = std::vector> requires(std::is_same_v, @@ -125,6 +136,24 @@ class LocalSplitSourceConnector : public SplitSourceConnector { throw Exception( Status::FatalError("Unreachable, params is got by file_scan_range_params_map")); } + + bool all_scan_ranges_match( + const TFileScanRangeParams& params, + const std::function& + predicate) override { + if (_scan_ranges.empty()) { + return false; + } + for (const auto& scan_range : _scan_ranges) { + const auto& file_scan_range = scan_range.scan_range.ext_scan_range.file_scan_range; + for (const auto& range : file_scan_range.ranges) { + if (!predicate(params, range)) { + return false; + } + } + } + return true; + } }; /** diff --git a/be/src/exec/sink/writer/vhive_partition_writer.cpp b/be/src/exec/sink/writer/vhive_partition_writer.cpp index 5e2582ceb5f8fc..8331efac54bd47 100644 --- a/be/src/exec/sink/writer/vhive_partition_writer.cpp +++ b/be/src/exec/sink/writer/vhive_partition_writer.cpp @@ -93,6 +93,8 @@ Status VHivePartitionWriter::open(RuntimeState* state, RuntimeProfile* operator_ to_string(_hive_compress_type)); } } + // TODO: INT96 is kept for Hive 2/3 compatibility. Add an explicit option before + // changing the default Hive parquet timestamp encoding to standard logical types. ParquetFileOptions parquet_options = {parquet_compression_type, TParquetVersion::PARQUET_1_0, false, true}; _file_format_transformer = std::make_unique( diff --git a/be/src/exprs/runtime_filter_expr.cpp b/be/src/exprs/runtime_filter_expr.cpp index f0c1f67c12a6c3..584707fddc6a9a 100644 --- a/be/src/exprs/runtime_filter_expr.cpp +++ b/be/src/exprs/runtime_filter_expr.cpp @@ -68,6 +68,17 @@ RuntimeFilterExpr::RuntimeFilterExpr(const TExprNode& node, VExprSPtr impl, doub DORIS_CHECK(_impl != nullptr); } +Status RuntimeFilterExpr::clone_node(VExprSPtr* cloned_expr) const { + DORIS_CHECK(cloned_expr != nullptr); + DORIS_CHECK(_impl != nullptr); + VExprSPtr cloned_impl; + RETURN_IF_ERROR(_impl->deep_clone(&cloned_impl)); + *cloned_expr = RuntimeFilterExpr::create_shared(clone_texpr_node(), std::move(cloned_impl), + _ignore_thredhold, _null_aware, _filter_id, + _sampling_frequency); + return Status::OK(); +} + Status RuntimeFilterExpr::prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) { RETURN_IF_ERROR_OR_PREPARED(_impl->prepare(state, desc, context)); @@ -92,7 +103,7 @@ void RuntimeFilterExpr::close(VExprContext* context, FunctionContext::FunctionSt Status RuntimeFilterExpr::execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, size_t count, ColumnPtr& result_column) const { - return Status::InternalError("Not implement RuntimeFilterExpr::execute_column_impl"); + return _impl->execute_column(context, block, selector, count, result_column); } const std::string& RuntimeFilterExpr::expr_name() const { diff --git a/be/src/exprs/runtime_filter_expr.h b/be/src/exprs/runtime_filter_expr.h index efbe55878f24b9..7994d2a71ae14f 100644 --- a/be/src/exprs/runtime_filter_expr.h +++ b/be/src/exprs/runtime_filter_expr.h @@ -24,6 +24,7 @@ #include #include #include +#include #include "common/config.h" #include "common/status.h" @@ -81,6 +82,8 @@ class RuntimeFilterExpr final : public VExpr { } VExprSPtr get_impl() const override { return _impl; } + void set_impl(VExprSPtr impl) { _impl = std::move(impl); } + Status clone_node(VExprSPtr* cloned_expr) const override; void attach_profile_counter(std::shared_ptr rf_input_rows, std::shared_ptr rf_filter_rows, @@ -117,6 +120,9 @@ class RuntimeFilterExpr final : public VExpr { std::shared_ptr predicate_always_true_rows_counter() const { return _always_true_filter_rows; } + bool is_slot_ref() const override { return false; } + bool is_virtual_slot_ref() const override { return false; } + bool is_column_ref() const override { return false; } private: VExprSPtr _impl; diff --git a/be/src/exprs/short_circuit_evaluation_expr.h b/be/src/exprs/short_circuit_evaluation_expr.h index 47a37b360c6e90..7240207aacad71 100644 --- a/be/src/exprs/short_circuit_evaluation_expr.h +++ b/be/src/exprs/short_circuit_evaluation_expr.h @@ -63,6 +63,13 @@ class ShortCircuitIfExpr final : public ShortCircuitExpr { ~ShortCircuitIfExpr() override = default; const std::string& expr_name() const override { return IF_NAME; } + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + auto node = clone_texpr_node(); + node.__set_short_circuit_evaluation(true); + *cloned_expr = ShortCircuitIfExpr::create_shared(node); + return Status::OK(); + } Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, size_t count, ColumnPtr& result_column) const override; @@ -76,6 +83,18 @@ class ShortCircuitCaseExpr final : public ShortCircuitExpr { ShortCircuitCaseExpr(const TExprNode& node); ~ShortCircuitCaseExpr() override = default; const std::string& expr_name() const override { return CASE_NAME; } + bool has_else_expr() const { return _has_else_expr; } + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + auto node = clone_texpr_node(); + TCaseExpr case_node; + case_node.__set_has_case_expr(false); + case_node.__set_has_else_expr(_has_else_expr); + node.__set_case_expr(case_node); + node.__set_short_circuit_evaluation(true); + *cloned_expr = ShortCircuitCaseExpr::create_shared(node); + return Status::OK(); + } Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, size_t count, ColumnPtr& result_column) const override; @@ -91,6 +110,13 @@ class ShortCircuitIfNullExpr final : public ShortCircuitExpr { ~ShortCircuitIfNullExpr() override = default; const std::string& expr_name() const override { return IFNULL_NAME; } + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + auto node = clone_texpr_node(); + node.__set_short_circuit_evaluation(true); + *cloned_expr = ShortCircuitIfNullExpr::create_shared(node); + return Status::OK(); + } Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, size_t count, ColumnPtr& result_column) const override; @@ -104,10 +130,17 @@ class ShortCircuitCoalesceExpr final : public ShortCircuitExpr { ShortCircuitCoalesceExpr(const TExprNode& node) : ShortCircuitExpr(node) {} ~ShortCircuitCoalesceExpr() override = default; const std::string& expr_name() const override { return COALESCE_NAME; } + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + auto node = clone_texpr_node(); + node.__set_short_circuit_evaluation(true); + *cloned_expr = ShortCircuitCoalesceExpr::create_shared(node); + return Status::OK(); + } Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, size_t count, ColumnPtr& result_column) const override; private: inline static const std::string COALESCE_NAME = "coalesce"; }; -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/exprs/vbloom_predicate.h b/be/src/exprs/vbloom_predicate.h index f23bde0d9ad3fd..410bb5c8d370b3 100644 --- a/be/src/exprs/vbloom_predicate.h +++ b/be/src/exprs/vbloom_predicate.h @@ -59,6 +59,13 @@ class VBloomPredicate final : public VExpr { std::shared_ptr get_bloom_filter_func() const override { return _filter; } uint64_t get_digest(uint64_t seed) const override; + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + auto cloned = VBloomPredicate::create_shared(clone_texpr_node()); + cloned->set_filter(_filter); + *cloned_expr = std::move(cloned); + return Status::OK(); + } private: Status _do_execute(VExprContext* context, const Block* block, const uint8_t* __restrict filter, diff --git a/be/src/exprs/vcase_expr.h b/be/src/exprs/vcase_expr.h index 97b2551091d100..6787283f0c5d23 100644 --- a/be/src/exprs/vcase_expr.h +++ b/be/src/exprs/vcase_expr.h @@ -59,6 +59,17 @@ class VCaseExpr final : public VExpr { void close(VExprContext* context, FunctionContext::FunctionStateScope scope) override; const std::string& expr_name() const override; std::string debug_string() const override; + bool has_else_expr() const { return _has_else_expr; } + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + auto node = clone_texpr_node(); + TCaseExpr case_node; + case_node.__set_has_case_expr(false); + case_node.__set_has_else_expr(_has_else_expr); + node.__set_case_expr(case_node); + *cloned_expr = VCaseExpr::create_shared(node); + return Status::OK(); + } private: template diff --git a/be/src/exprs/vcast_expr.h b/be/src/exprs/vcast_expr.h index c3f2526794b3b8..f0f3ead95d56af 100644 --- a/be/src/exprs/vcast_expr.h +++ b/be/src/exprs/vcast_expr.h @@ -57,6 +57,11 @@ class VCastExpr : public VExpr { const DataTypePtr& get_target_type() const; virtual std::string cast_name() const { return "CAST"; } + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = VCastExpr::create_shared(clone_texpr_node()); + return Status::OK(); + } uint64_t get_digest(uint64_t seed) const override { auto res = VExpr::get_digest(seed); @@ -94,6 +99,13 @@ class TryCastExpr final : public VCastExpr { size_t count, ColumnPtr& result_column) const override; ~TryCastExpr() override = default; std::string cast_name() const override { return "TRY CAST"; } + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + auto node = clone_texpr_node(); + node.__set_is_cast_nullable(_original_cast_return_is_nullable); + *cloned_expr = TryCastExpr::create_shared(node); + return Status::OK(); + } private: DataTypePtr original_cast_return_type() const; diff --git a/be/src/exprs/vcolumn_ref.h b/be/src/exprs/vcolumn_ref.h index e4485e5815e02f..33ade77defaaba 100644 --- a/be/src/exprs/vcolumn_ref.h +++ b/be/src/exprs/vcolumn_ref.h @@ -81,6 +81,19 @@ class VColumnRef final : public VExpr { } } + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + auto node = clone_texpr_node(); + TColumnRef column_ref; + column_ref.__set_column_id(_column_id); + column_ref.__set_column_name(_column_name); + node.__set_column_ref(column_ref); + auto cloned = VColumnRef::create_shared(node); + cloned->set_gap(_gap.load()); + *cloned_expr = std::move(cloned); + return Status::OK(); + } + std::string debug_string() const override { std::stringstream out; out << "VColumnRef(slot_id: " << _column_id << ",column_name: " << _column_name diff --git a/be/src/exprs/vcompound_pred.h b/be/src/exprs/vcompound_pred.h index 9772efb90bc270..b703e30ea6c16f 100644 --- a/be/src/exprs/vcompound_pred.h +++ b/be/src/exprs/vcompound_pred.h @@ -60,6 +60,11 @@ class VCompoundPred : public VectorizedFnCall { #endif const std::string& expr_name() const override { return _expr_name; } + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = VCompoundPred::create_shared(clone_texpr_node()); + return Status::OK(); + } bool can_evaluate_zonemap_filter() const override { switch (_op) { diff --git a/be/src/exprs/vcondition_expr.h b/be/src/exprs/vcondition_expr.h index ceb6ed1396d78f..6beade74b3e3d6 100644 --- a/be/src/exprs/vcondition_expr.h +++ b/be/src/exprs/vcondition_expr.h @@ -65,6 +65,11 @@ class VectorizedIfExpr : public VConditionExpr { size_t count, ColumnPtr& result_column) const override; const std::string& expr_name() const override { return IF_NAME; } + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = VectorizedIfExpr::create_shared(clone_texpr_node()); + return Status::OK(); + } inline static const std::string IF_NAME = "if"; protected: @@ -123,6 +128,11 @@ class VectorizedIfNullExpr : public VectorizedIfExpr { public: VectorizedIfNullExpr(const TExprNode& node) : VectorizedIfExpr(node) {} const std::string& expr_name() const override { return IF_NULL_NAME; } + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = VectorizedIfNullExpr::create_shared(clone_texpr_node()); + return Status::OK(); + } inline static const std::string IF_NULL_NAME = "ifnull"; Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, @@ -137,6 +147,11 @@ class VectorizedCoalesceExpr : public VConditionExpr { size_t count, ColumnPtr& result_column) const override; VectorizedCoalesceExpr(const TExprNode& node) : VConditionExpr(node) {} const std::string& expr_name() const override { return NAME; } + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = VectorizedCoalesceExpr::create_shared(clone_texpr_node()); + return Status::OK(); + } inline static const std::string NAME = "coalesce"; }; diff --git a/be/src/exprs/vdirect_in_predicate.h b/be/src/exprs/vdirect_in_predicate.h index 21b729f140cc5c..2fd1e9a35febc7 100644 --- a/be/src/exprs/vdirect_in_predicate.h +++ b/be/src/exprs/vdirect_in_predicate.h @@ -46,7 +46,7 @@ class VDirectInPredicate final : public VExpr { // materialization and slot-IN rewrite that would otherwise rebuild child-typed literals from // dictionary codes. VDirectInPredicate(const TExprNode& node, const std::shared_ptr& filter, - bool hybrid_set_values_match_child_type) + bool hybrid_set_values_match_child_type = true) : VExpr(node), _filter(filter), _hybrid_set_values_match_child_type(hybrid_set_values_match_child_type), @@ -98,6 +98,13 @@ class VDirectInPredicate final : public VExpr { std::dynamic_pointer_cast(get_child(0)) != nullptr; } + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = VDirectInPredicate::create_shared(clone_texpr_node(), _filter, + _hybrid_set_values_match_child_type); + return Status::OK(); + } + bool get_slot_in_expr(VExprSPtr& new_root) const { if (!_hybrid_set_values_match_child_type) { return false; diff --git a/be/src/exprs/vectorized_fn_call.cpp b/be/src/exprs/vectorized_fn_call.cpp index 386edffb5e7e5e..8cc189f8b9939e 100644 --- a/be/src/exprs/vectorized_fn_call.cpp +++ b/be/src/exprs/vectorized_fn_call.cpp @@ -81,7 +81,9 @@ const static std::set DISTANCE_FUNCS = {L2DistanceApproximate::name const static std::set OPS_FOR_ANN_RANGE_SEARCH = { TExprOpcode::GE, TExprOpcode::LE, TExprOpcode::LE, TExprOpcode::GT, TExprOpcode::LT}; -VectorizedFnCall::VectorizedFnCall(const TExprNode& node) : VExpr(node) {} +VectorizedFnCall::VectorizedFnCall(const TExprNode& node) : VExpr(node) { + _function_name = _fn.name.function_name; +} Status VectorizedFnCall::prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) { diff --git a/be/src/exprs/vectorized_fn_call.h b/be/src/exprs/vectorized_fn_call.h index c6e24c5377e48a..7bbb4303d41c47 100644 --- a/be/src/exprs/vectorized_fn_call.h +++ b/be/src/exprs/vectorized_fn_call.h @@ -101,6 +101,12 @@ class VectorizedFnCall : public VExpr { segment_v2::AnnRangeSearchRuntime& runtime, bool& suitable_for_ann_index) override; + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = std::make_shared(*this); + return Status::OK(); + } + protected: FunctionBasePtr _function; std::string _expr_name; diff --git a/be/src/exprs/vexpr.cpp b/be/src/exprs/vexpr.cpp index d88b18d5f022a5..c01d299f411a08 100644 --- a/be/src/exprs/vexpr.cpp +++ b/be/src/exprs/vexpr.cpp @@ -378,6 +378,51 @@ VExpr::VExpr(DataTypePtr type, bool is_slotref) } } +TExprNode VExpr::clone_texpr_node() const { + TExprNode node; + node.__set_node_type(_node_type); + node.__set_opcode(_opcode); + node.__set_type(create_type_desc(remove_nullable(_data_type)->get_primitive_type(), + static_cast(_data_type->get_precision()), + static_cast(_data_type->get_scale()))); + node.__set_is_nullable(_data_type->is_nullable()); + node.__set_num_children(get_num_children()); + node.__set_fn(_fn); + return node; +} + +Status VExpr::clone_node(VExprSPtr* cloned_expr) const { + DORIS_CHECK(cloned_expr != nullptr); + return Status::NotSupported("Cannot clone expression {} for file-local rewrite", expr_name()); +} + +Status VExpr::deep_clone(VExprSPtr* cloned_expr, + const VExprCloneNodeOverride& clone_node_override) const { + DORIS_CHECK(cloned_expr != nullptr); + + VExprSPtr cloned; + if (clone_node_override) { + RETURN_IF_ERROR(clone_node_override(*this, &cloned)); + } + if (cloned == nullptr) { + RETURN_IF_ERROR(clone_node(&cloned)); + } + DORIS_CHECK(cloned != nullptr); + + VExprSPtrs cloned_children; + cloned_children.reserve(_children.size()); + for (const auto& child : _children) { + DORIS_CHECK(child != nullptr); + VExprSPtr cloned_child; + RETURN_IF_ERROR(child->deep_clone(&cloned_child, clone_node_override)); + cloned_children.push_back(std::move(cloned_child)); + } + cloned->set_children(std::move(cloned_children)); + cloned->reset_prepare_state(); + *cloned_expr = std::move(cloned); + return Status::OK(); +} + Status VExpr::prepare(RuntimeState* state, const RowDescriptor& row_desc, VExprContext* context) { ++context->_depth_num; if (context->_depth_num > config::max_depth_of_expr_tree) { @@ -407,6 +452,15 @@ Status VExpr::open(RuntimeState* state, VExprContext* context, return Status::OK(); } +void VExpr::reset_prepare_state() { + _prepared = false; + _prepare_finished = false; + _open_finished = false; + for (auto& child : _children) { + child->reset_prepare_state(); + } +} + void VExpr::close(VExprContext* context, FunctionContext::FunctionStateScope scope) { for (auto& i : _children) { i->close(context, scope); @@ -757,8 +811,9 @@ Status VExpr::get_const_col(VExprContext* context, return Status::OK(); } - if (_constant_col != nullptr) { - DCHECK(column_wrapper != nullptr); + if (_constant_col != nullptr && column_wrapper == nullptr) { + return Status::OK(); + } else if (_constant_col != nullptr) { *column_wrapper = _constant_col; return Status::OK(); } diff --git a/be/src/exprs/vexpr.h b/be/src/exprs/vexpr.h index 3b48fd3d2c60d7..5da5206d1b2aa7 100644 --- a/be/src/exprs/vexpr.h +++ b/be/src/exprs/vexpr.h @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -81,6 +82,7 @@ struct AnnRangeSearchRuntime; // the relatioinship between threads and classes. using Selector = IColumn::Selector; +using VExprCloneNodeOverride = std::function; struct AnnRangeSearchEvaluationResult { // Indicates whether the expr row_bitmap has been updated. @@ -215,11 +217,13 @@ class VExpr { const DataTypePtr& data_type() const { return _data_type; } - bool is_slot_ref() const { return _node_type == TExprNodeType::SLOT_REF; } + virtual bool is_slot_ref() const { return _node_type == TExprNodeType::SLOT_REF; } - bool is_virtual_slot_ref() const { return _node_type == TExprNodeType::VIRTUAL_SLOT_REF; } + virtual bool is_virtual_slot_ref() const { + return _node_type == TExprNodeType::VIRTUAL_SLOT_REF; + } - bool is_column_ref() const { return _node_type == TExprNodeType::COLUMN_REF; } + virtual bool is_column_ref() const { return _node_type == TExprNodeType::COLUMN_REF; } virtual bool is_literal() const { return false; } @@ -253,6 +257,10 @@ class VExpr { static bool contains_blockable_function(const VExprContextSPtrs& ctxs); + Status deep_clone(VExprSPtr* cloned_expr, + const VExprCloneNodeOverride& clone_node_override = {}) const; + virtual Status clone_node(VExprSPtr* cloned_expr) const; + bool is_nullable() const { return _data_type->is_nullable(); } PrimitiveType result_type() const { return _data_type->get_primitive_type(); } @@ -267,6 +275,7 @@ class VExpr { virtual const VExprSPtrs& children() const { return _children; } void set_children(const VExprSPtrs& children) { _children = children; } void set_children(VExprSPtrs&& children) { _children = std::move(children); } + void reset_prepare_state(); virtual std::string debug_string() const; static std::string debug_string(const VExprSPtrs& exprs); static std::string debug_string(const VExprContextSPtrs& ctxs); @@ -274,7 +283,7 @@ class VExpr { static ColumnPtr filter_column_with_selector(const ColumnPtr& origin_column, const Selector* selector, size_t count) { if (selector == nullptr) { - DCHECK_EQ(origin_column->size(), count); + DCHECK_EQ(origin_column->size(), count) << origin_column->get_name(); return origin_column; } DCHECK_EQ(count, selector->size()); @@ -368,6 +377,8 @@ class VExpr { virtual uint64_t get_digest(uint64_t seed) const; protected: + TExprNode clone_texpr_node() const; + /// Simple debug string that provides no expr subclass-specific information std::string debug_string(const std::string& expr_name) const { std::stringstream out; diff --git a/be/src/exprs/vin_predicate.h b/be/src/exprs/vin_predicate.h index 6cf3858768264c..6830f1bc4c2c20 100644 --- a/be/src/exprs/vin_predicate.h +++ b/be/src/exprs/vin_predicate.h @@ -64,6 +64,15 @@ class VInPredicate MOCK_REMOVE(final) : public VExpr { bool can_evaluate_zonemap_filter() const override; uint64_t get_digest(uint64_t seed) const override { return 0; } + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + auto node = clone_texpr_node(); + TInPredicate in_predicate; + in_predicate.__set_is_not_in(_is_not_in); + node.__set_in_predicate(in_predicate); + *cloned_expr = VInPredicate::create_shared(node); + return Status::OK(); + } private: Status _materialize_for_zonemap_filter(VExprContext* context); diff --git a/be/src/exprs/vliteral.cpp b/be/src/exprs/vliteral.cpp index 551839f699e2e6..9b93d7097274ee 100644 --- a/be/src/exprs/vliteral.cpp +++ b/be/src/exprs/vliteral.cpp @@ -37,12 +37,6 @@ namespace doris { class VExprContext; -void VLiteral::init(const TExprNode& node) { - Field field; - field = _data_type->get_field(node); - _column_ptr = _data_type->create_column_const(1, field); -} - Status VLiteral::prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) { RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context)); return Status::OK(); diff --git a/be/src/exprs/vliteral.h b/be/src/exprs/vliteral.h index b1b8e89157d420..89988e2ba31142 100644 --- a/be/src/exprs/vliteral.h +++ b/be/src/exprs/vliteral.h @@ -24,6 +24,7 @@ #include "common/status.h" #include "core/data_type/data_type.h" #include "core/data_type_serde/data_type_serde.h" +#include "core/field.h" #include "exprs/vexpr.h" namespace doris { @@ -39,10 +40,19 @@ class VLiteral : public VExpr { VLiteral(const TExprNode& node, bool should_init = true) : VExpr(node), _expr_name(_data_type->get_name()) { if (should_init) { - init(node); + Field field; + field = _data_type->get_field(node); + _column_ptr = _data_type->create_column_const(1, field); } } + VLiteral(const DataTypePtr& type, const Field& field) : VExpr(type, false) { + _data_type = type; + _column_ptr = _data_type->create_column_const(1, field); + _node_type = TExprNodeType::LITERAL; + _expr_name = _data_type->get_name(); + } + #ifdef BE_TEST VLiteral() = default; MOCK_FUNCTION std::string value() const; @@ -67,13 +77,18 @@ class VLiteral : public VExpr { bool equals(const VExpr& other) override; uint64_t get_digest(uint64_t seed) const override; + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + Field field; + _column_ptr->get(0, field); + *cloned_expr = VLiteral::create_shared(_data_type, field); + return Status::OK(); + } protected: + VLiteral(const DataTypePtr& type) : VExpr(type, false) {} ColumnPtr _column_ptr; std::string _expr_name; - -private: - void init(const TExprNode& node); }; } // namespace doris diff --git a/be/src/exprs/vslot_ref.cpp b/be/src/exprs/vslot_ref.cpp index 87aad6b977ecbe..f02ef50d5751c3 100644 --- a/be/src/exprs/vslot_ref.cpp +++ b/be/src/exprs/vslot_ref.cpp @@ -41,10 +41,28 @@ VSlotRef::VSlotRef(const doris::TExprNode& node) VSlotRef::VSlotRef(const SlotDescriptor* desc) : VExpr(desc->type(), true), _slot_id(desc->id()), _column_id(-1), _column_name(nullptr) {} +VSlotRef::VSlotRef(int slot_id, int column_id, int column_uniq_id, const DataTypePtr& type, + std::string column_name) + : VExpr(type, true), + _slot_id(slot_id), + _column_id(column_id), + _column_uniq_id(column_uniq_id), + _owned_column_name(std::move(column_name)), + _column_name(&_owned_column_name) {} + Status VSlotRef::prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc, VExprContext* context) { - RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context)); DCHECK_EQ(_children.size(), 0); + if (_prepared) { + return Status::OK(); + } + if (_column_id >= 0 && _column_name != nullptr) { + _prepared = true; + _prepare_finished = true; + return Status::OK(); + } + _prepared = true; + RETURN_IF_ERROR(VExpr::prepare(state, desc, context)); if (_slot_id == -1) { _prepare_finished = true; return Status::OK(); @@ -109,6 +127,27 @@ DataTypePtr VSlotRef::execute_type(const Block* block) const { return block->get_by_position(_column_id).type; } +Status VSlotRef::clone_node(VExprSPtr* cloned_expr) const { + DORIS_CHECK(cloned_expr != nullptr); + if (_column_id >= 0 && _column_name != nullptr) { + *cloned_expr = VSlotRef::create_shared(_slot_id, _column_id, _column_uniq_id, _data_type, + *_column_name); + return Status::OK(); + } + auto node = clone_texpr_node(); + TSlotRef slot_ref; + slot_ref.__set_slot_id(_slot_id); + node.__set_slot_ref(slot_ref); + node.__set_label(_column_label); + auto cloned = VSlotRef::create_shared(node); + auto* cloned_slot_ref = static_cast(cloned.get()); + cloned_slot_ref->_column_id = _column_id; + cloned_slot_ref->_column_uniq_id = _column_uniq_id; + cloned_slot_ref->_column_name = _column_name; + *cloned_expr = std::move(cloned); + return Status::OK(); +} + const std::string& VSlotRef::expr_name() const { return *_column_name; } diff --git a/be/src/exprs/vslot_ref.h b/be/src/exprs/vslot_ref.h index ef61edc384c2f2..a67bdc1953cd0a 100644 --- a/be/src/exprs/vslot_ref.h +++ b/be/src/exprs/vslot_ref.h @@ -31,12 +31,14 @@ class TExprNode; class Block; class VExprContext; -class VSlotRef MOCK_REMOVE(final) : public VExpr { +class VSlotRef : public VExpr { ENABLE_FACTORY_CREATOR(VSlotRef); public: VSlotRef(const TExprNode& node); VSlotRef(const SlotDescriptor* desc); + VSlotRef(int slot_id, int column_id, int column_uniq_id, const DataTypePtr& type, + std::string column_name); #ifdef BE_TEST VSlotRef() = default; void set_slot_id(int slot_id) { _slot_id = slot_id; } @@ -58,6 +60,7 @@ class VSlotRef MOCK_REMOVE(final) : public VExpr { int column_id() const { return _column_id; } MOCK_FUNCTION int slot_id() const { return _slot_id; } + int column_uniq_id() const { return _column_uniq_id; } bool equals(const VExpr& other) override; @@ -67,16 +70,24 @@ class VSlotRef MOCK_REMOVE(final) : public VExpr { column_ids.insert(_column_id); } - MOCK_FUNCTION const std::string& column_name() const { return *_column_name; } + virtual const std::string& column_name() const { return *_column_name; } uint64_t get_digest(uint64_t seed) const override; double execute_cost() const override { return 0.0; } + Status clone_node(VExprSPtr* cloned_expr) const override; + +protected: + VSlotRef(int slot_id, int column_id, int column_uniq_id) + : _slot_id(slot_id), _column_id(column_id), _column_uniq_id(column_uniq_id) { + _node_type = TExprNodeType::SLOT_REF; + } private: int _slot_id; int _column_id; int _column_uniq_id = -1; + std::string _owned_column_name; const std::string* _column_name = nullptr; const std::string _column_label; }; diff --git a/be/src/exprs/vtopn_pred.h b/be/src/exprs/vtopn_pred.h index 94887588f536da..a6edec65accd3d 100644 --- a/be/src/exprs/vtopn_pred.h +++ b/be/src/exprs/vtopn_pred.h @@ -63,6 +63,11 @@ class VTopNPred : public VExpr { } int source_node_id() const { return _source_node_id; } + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = VTopNPred::create_shared(clone_texpr_node(), _source_node_id, nullptr); + return Status::OK(); + } Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override { _predicate = &state->get_query_ctx()->get_runtime_predicate(_source_node_id); diff --git a/be/src/format/CMakeLists.txt b/be/src/format/CMakeLists.txt index ef9dab92c00f97..bc0325f3e0f252 100644 --- a/be/src/format/CMakeLists.txt +++ b/be/src/format/CMakeLists.txt @@ -22,6 +22,9 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/format") set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/format") file(GLOB_RECURSE SRC_FILES CONFIGURE_DEPENDS *.cpp) +file(GLOB_RECURSE FORMAT_V2_SRC_FILES CONFIGURE_DEPENDS + ${CMAKE_CURRENT_SOURCE_DIR}/../format_v2/*.cpp) +list(APPEND SRC_FILES ${FORMAT_V2_SRC_FILES}) # Lance reader requires Rust static library (BUILD_RUST_READERS=ON) if (NOT BUILD_RUST_READERS) diff --git a/be/src/format/csv/csv_reader.cpp b/be/src/format/csv/csv_reader.cpp index 3d1e978ffe911f..b8f0be49bfea1e 100644 --- a/be/src/format/csv/csv_reader.cpp +++ b/be/src/format/csv/csv_reader.cpp @@ -668,8 +668,8 @@ Status CsvReader::_create_file_reader(bool need_schema) { need_schema)); } else { _file_description.mtime = _range.__isset.modification_time ? _range.modification_time : 0; - io::FileReaderOptions reader_options = - FileFactory::get_reader_options(_state, _file_description); + io::FileReaderOptions reader_options = FileFactory::get_reader_options( + _state ? _state->query_options() : _default_query_options, _file_description); io::FileReaderSPtr file_reader; if (_io_ctx_holder) { file_reader = DORIS_TRY(io::DelegateReader::create_file_reader( diff --git a/be/src/format/generic_reader.h b/be/src/format/generic_reader.h index d849d595056adb..88fc3fb85a0eb1 100644 --- a/be/src/format/generic_reader.h +++ b/be/src/format/generic_reader.h @@ -40,6 +40,7 @@ #include "runtime/runtime_state.h" #include "storage/predicate/block_column_predicate.h" #include "storage/segment/common.h" +#include "storage/segment/condition_cache.h" #include "util/profile_collector.h" namespace doris { @@ -51,16 +52,6 @@ namespace doris { class Block; class VSlotRef; -// Context passed from FileScanner to readers for condition cache integration. -// On MISS: readers populate filter_result per-granule during predicate evaluation. -// On HIT: readers skip granules where filter_result[granule] == false. -struct ConditionCacheContext { - bool is_hit = false; - std::shared_ptr> filter_result; // per-granule: true = has surviving rows - int64_t base_granule = 0; // global granule index of the first granule in filter_result - static constexpr int GRANULE_SIZE = 2048; -}; - /// Base context for the unified init_reader(ReaderInitContext*) template method. /// Contains fields shared by ALL reader types. Format-specific readers define /// subclasses (ParquetInitContext, OrcInitContext, etc.) with extra fields. @@ -299,6 +290,7 @@ class GenericReader : public ProfileCollector { // ---- get_columns cache ---- bool _get_columns_cached = false; std::unordered_map _cached_name_to_type; + const TQueryOptions _default_query_options; }; /// Provides an accessor for the current batch's row positions within the file. diff --git a/be/src/format/json/new_json_reader.cpp b/be/src/format/json/new_json_reader.cpp index 8d53b6009e6bef..1aa19574b39a58 100644 --- a/be/src/format/json/new_json_reader.cpp +++ b/be/src/format/json/new_json_reader.cpp @@ -498,8 +498,8 @@ Status NewJsonReader::_open_file_reader(bool need_schema) { need_schema)); } else { _file_description.mtime = _range.__isset.modification_time ? _range.modification_time : 0; - io::FileReaderOptions reader_options = - FileFactory::get_reader_options(_state, _file_description); + io::FileReaderOptions reader_options = FileFactory::get_reader_options( + _state ? _state->query_options() : _default_query_options, _file_description); io::FileReaderSPtr file_reader; if (_io_ctx_holder) { file_reader = DORIS_TRY(io::DelegateReader::create_file_reader( diff --git a/be/src/format/native/native_reader.cpp b/be/src/format/native/native_reader.cpp index 029d7ff2024f20..3632b6e4e0a1c9 100644 --- a/be/src/format/native/native_reader.cpp +++ b/be/src/format/native/native_reader.cpp @@ -137,8 +137,8 @@ Status NativeReader::init_reader() { _scan_params.broker_addresses.end()); } - io::FileReaderOptions reader_options = - FileFactory::get_reader_options(_state, file_description); + io::FileReaderOptions reader_options = FileFactory::get_reader_options( + _state ? _state->query_options() : _default_query_options, file_description); auto reader_res = _io_ctx_holder ? io::DelegateReader::create_file_reader( _profile, system_properties, file_description, reader_options, diff --git a/be/src/format/orc/vorc_reader.cpp b/be/src/format/orc/vorc_reader.cpp index 54ced60d6e0361..80dc857ddf4bc3 100644 --- a/be/src/format/orc/vorc_reader.cpp +++ b/be/src/format/orc/vorc_reader.cpp @@ -382,8 +382,8 @@ Status OrcReader::_create_file_reader() { if (_file_input_stream == nullptr) { _file_description.mtime = _scan_range.__isset.modification_time ? _scan_range.modification_time : 0; - io::FileReaderOptions reader_options = - FileFactory::get_reader_options(_state, _file_description); + io::FileReaderOptions reader_options = FileFactory::get_reader_options( + _state ? _state->query_options() : _default_query_options, _file_description); io::FileReaderSPtr inner_reader; if (_io_ctx_holder != nullptr) { inner_reader = DORIS_TRY(io::DelegateReader::create_file_reader( diff --git a/be/src/format/parquet/vparquet_reader.cpp b/be/src/format/parquet/vparquet_reader.cpp index 48dd987c44f83a..11758c76410784 100644 --- a/be/src/format/parquet/vparquet_reader.cpp +++ b/be/src/format/parquet/vparquet_reader.cpp @@ -325,8 +325,8 @@ Status ParquetReader::_open_file() { ++_reader_statistics.open_file_num; _file_description.mtime = _scan_range.__isset.modification_time ? _scan_range.modification_time : 0; - io::FileReaderOptions reader_options = - FileFactory::get_reader_options(_state, _file_description); + io::FileReaderOptions reader_options = FileFactory::get_reader_options( + _state ? _state->query_options() : _default_query_options, _file_description); if (_io_ctx_holder) { _file_reader = DORIS_TRY(io::DelegateReader::create_file_reader( _profile, _system_properties, _file_description, reader_options, diff --git a/be/src/format/table/deletion_vector_reader.cpp b/be/src/format/table/deletion_vector_reader.cpp index bfe34a5f555f94..d7e33c923d95b7 100644 --- a/be/src/format/table/deletion_vector_reader.cpp +++ b/be/src/format/table/deletion_vector_reader.cpp @@ -54,9 +54,9 @@ Status DeletionVectorReader::_create_file_reader() { return Status::EndOfFile("stop read."); } - _file_description.mtime = _range.__isset.modification_time ? _range.modification_time : 0; + _file_description.mtime = _desc.modification_time; io::FileReaderOptions reader_options = - FileFactory::get_reader_options(_state, _file_description); + FileFactory::get_reader_options(_state->query_options(), _file_description); _file_reader = DORIS_TRY(io::DelegateReader::create_file_reader( _profile, _system_properties, _file_description, reader_options, io::DelegateReader::AccessMode::RANDOM, _io_ctx)); @@ -64,20 +64,13 @@ Status DeletionVectorReader::_create_file_reader() { } void DeletionVectorReader::_init_file_description() { - _file_description.path = _range.path; - _file_description.file_size = _range.__isset.file_size ? _range.file_size : -1; - if (_range.__isset.fs_name) { - _file_description.fs_name = _range.fs_name; - } + _file_description.path = _desc.path; + _file_description.file_size = _desc.file_size; + _file_description.fs_name = _desc.fs_name; } void DeletionVectorReader::_init_system_properties() { - if (_range.__isset.file_type) { - // for compatibility - _system_properties.system_type = _range.file_type; - } else { - _system_properties.system_type = _params.file_type; - } + _system_properties.system_type = _params.file_type; _system_properties.properties = _params.properties; _system_properties.hdfs_params = _params.hdfs_params; if (_params.__isset.broker_addresses) { diff --git a/be/src/format/table/deletion_vector_reader.h b/be/src/format/table/deletion_vector_reader.h index 0663f3b28490ef..968344a8496bc7 100644 --- a/be/src/format/table/deletion_vector_reader.h +++ b/be/src/format/table/deletion_vector_reader.h @@ -36,6 +36,22 @@ struct IOContext; } // namespace io namespace doris { +struct DeleteFileDesc { + enum class Format { + PAIMON, + ICEBERG, + }; + + std::string key = ""; + std::string path = ""; + std::string fs_name = ""; + int64_t start_offset = 0; + int64_t size = 0; + int64_t file_size = -1; + int64_t modification_time = 0; + Format format = Format::PAIMON; +}; + class DeletionVectorReader { ENABLE_FACTORY_CREATOR(DeletionVectorReader); @@ -43,7 +59,22 @@ class DeletionVectorReader { DeletionVectorReader(RuntimeState* state, RuntimeProfile* profile, const TFileScanRangeParams& params, const TFileRangeDesc& range, io::IOContext* io_ctx) - : _state(state), _profile(profile), _range(range), _params(params), _io_ctx(io_ctx) {} + : _state(state), _profile(profile), _params(params), _io_ctx(io_ctx) { + _desc = DeleteFileDesc { + .key = "", + .path = range.path, + .fs_name = range.__isset.fs_name ? range.fs_name : "", + .start_offset = range.start_offset, + .size = range.size, + .file_size = range.__isset.file_size ? range.file_size : -1, + .modification_time = range.__isset.modification_time ? range.modification_time : 0}; + } + DeletionVectorReader(RuntimeState* state, RuntimeProfile* profile, + const TFileScanRangeParams& params, const DeleteFileDesc& desc, + io::IOContext* io_ctx) + : _state(state), _profile(profile), _params(params), _io_ctx(io_ctx) { + _desc = desc; + } ~DeletionVectorReader() = default; Status open(); Status read_at(size_t offset, Slice result); @@ -56,7 +87,7 @@ class DeletionVectorReader { private: RuntimeState* _state = nullptr; RuntimeProfile* _profile = nullptr; - const TFileRangeDesc& _range; + DeleteFileDesc _desc; const TFileScanRangeParams& _params; io::IOContext* _io_ctx = nullptr; diff --git a/be/src/format/table/iceberg_reader_mixin.h b/be/src/format/table/iceberg_reader_mixin.h index bd049342195695..2bc15f18cf141a 100644 --- a/be/src/format/table/iceberg_reader_mixin.h +++ b/be/src/format/table/iceberg_reader_mixin.h @@ -343,9 +343,6 @@ class IcebergReaderMixin : public BaseReader, public TableSchemaChangeHelper { // id -> block column name std::unordered_map _id_to_block_column_name; - // File column names used during init - std::vector _file_col_names; - std::function()> _create_topn_row_id_column_iterator; diff --git a/be/src/format_v2/column_data.h b/be/src/format_v2/column_data.h new file mode 100644 index 00000000000000..7816ea8263cb42 --- /dev/null +++ b/be/src/format_v2/column_data.h @@ -0,0 +1,410 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/consts.h" +#include "common/status.h" +#include "core/data_type/data_type.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/field.h" +#include "exprs/vexpr_fwd.h" + +namespace doris::format { + +// File-local top-level column id. +// +// Scope: +// - Only valid inside one physical file schema returned by FileReader::get_schema(). +// - For Parquet, this is the top-level field ordinal in the new reader schema. +// - The synthetic row-position column also uses this type, with a reserved negative id. +// +// Do not use this for table/global column unique ids, block positions, nested child ids, or +// slot ids. Nested child ids are carried by LocalColumnIndex::index below. +class LocalColumnId { +public: + constexpr LocalColumnId() = default; + explicit constexpr LocalColumnId(int32_t id) : _id(id) {} + + static constexpr LocalColumnId invalid() { return LocalColumnId(); } + + constexpr int32_t value() const { return _id; } + constexpr bool is_valid() const { return _id >= 0; } + + constexpr bool operator==(const LocalColumnId& other) const { return _id == other._id; } + constexpr bool operator!=(const LocalColumnId& other) const { return !(*this == other); } + constexpr bool operator<(const LocalColumnId& other) const { return _id < other._id; } + +private: + int32_t _id = -1; +}; + +// Position of a file-local column in the Block produced by one FileScanRequest. +// +// This is assigned by TableColumnMapper/TableReader after predicate/non-predicate columns are +// deduplicated. It is not a file schema id and it is not stable across requests. Use value() only +// at the boundary where an existing Block or expression API still expects a size_t/int position. +class LocalIndex { +public: + constexpr LocalIndex() = default; + explicit constexpr LocalIndex(size_t index) : _index(index) {} + + constexpr size_t value() const { return _index; } + constexpr bool operator==(const LocalIndex& other) const { return _index == other._index; } + constexpr bool operator<(const LocalIndex& other) const { return _index < other._index; } + +private: + size_t _index = 0; +}; + +// Position of a table/global output column in the final Block returned by TableReader. +// +// This type is reserved for boundaries that need to refer to caller-visible column order. It must +// not be used to index a file-local Block, because schema evolution and lazy materialization can +// make file-local order different from table output order. +class GlobalIndex { +public: + constexpr GlobalIndex() = default; + explicit constexpr GlobalIndex(size_t index) : _index(index) {} + + constexpr size_t value() const { return _index; } + constexpr bool operator==(const GlobalIndex& other) const { return _index == other._index; } + constexpr bool operator<(const GlobalIndex& other) const { return _index < other._index; } + +private: + size_t _index = 0; +}; + +// Index of a split-local constant/default value used to materialize columns that are not read from +// the physical file, such as partition columns, added columns with default values, and virtual +// table-format columns. +// +// It is separate from LocalIndex because constants do not occupy a position in the file reader +// output block unless an expression explicitly materializes them. +class ConstantIndex { +public: + constexpr ConstantIndex() = default; + explicit constexpr ConstantIndex(size_t index) : _index(index) {} + + constexpr size_t value() const { return _index; } + constexpr bool operator==(const ConstantIndex& other) const { return _index == other._index; } + constexpr bool operator<(const ConstantIndex& other) const { return _index < other._index; } + +private: + size_t _index = 0; +}; + +inline std::ostream& operator<<(std::ostream& os, const LocalColumnId& id) { + return os << id.value(); +} + +inline std::ostream& operator<<(std::ostream& os, const LocalIndex& index) { + return os << index.value(); +} + +inline std::ostream& operator<<(std::ostream& os, const GlobalIndex& index) { + return os << index.value(); +} + +inline std::ostream& operator<<(std::ostream& os, const ConstantIndex& index) { + return os << index.value(); +} + +// A split/file-local constant value used to materialize a table/global column without reading a +// physical file column. +// +// Common producers are partition values, schema-evolution default expressions, generated columns +// and table-format virtual columns. The entry is keyed by ConstantIndex in ConstantMap; global_index +// keeps the link back to the caller-visible output column. +struct ConstantEntry { + GlobalIndex global_index; + VExprContextSPtr expr; + DataTypePtr type; +}; + +// Per mapping/split collection of constants. +// +// ConstantIndex only has meaning within this container. Keeping constants separate from LocalIndex +// makes it explicit that these values do not occupy positions in the file reader output Block. +class ConstantMap { +public: + ConstantIndex add(ConstantEntry entry) { + const auto index = ConstantIndex(_entries.size()); + _entries.push_back(std::move(entry)); + return index; + } + + const ConstantEntry& get(ConstantIndex index) const { + DORIS_CHECK(index.value() < _entries.size()); + return _entries[index.value()]; + } + + void clear() { _entries.clear(); } + bool empty() const { return _entries.empty(); } + size_t size() const { return _entries.size(); } + + const std::vector& entries() const { return _entries; } + +private: + std::vector _entries; +}; + +// Target of a localized filter. +// +// A filter can either reference a file-local Block position or a constant entry. Unset entries mean +// the filter cannot be evaluated below the table-reader finalize stage. +struct FilterEntry { + enum class Kind { + UNSET, + LOCAL, + CONSTANT, + }; + + static FilterEntry local(LocalIndex index) { + return {.kind = Kind::LOCAL, .index = index.value()}; + } + + static FilterEntry constant(ConstantIndex index) { + return {.kind = Kind::CONSTANT, .index = index.value()}; + } + + bool is_set() const { return kind != Kind::UNSET; } + bool is_local() const { return kind == Kind::LOCAL; } + bool is_constant() const { return kind == Kind::CONSTANT; } + + LocalIndex local_index() const { + DORIS_CHECK(is_local()); + return LocalIndex(index); + } + + ConstantIndex constant_index() const { + DORIS_CHECK(is_constant()); + return ConstantIndex(index); + } + + Kind kind = Kind::UNSET; + size_t index = 0; +}; + +enum ColumnType { + DATA_COLUMN = 0, // normal data column + ROW_NUMBER = 1, // row number in a file + GLOBAL_ROWID = 2, // global unique row id across files, used by TopN filter +}; + +struct GlobalRowIdContext { + uint8_t version = 0; + int64_t backend_id = 0; + uint32_t file_id = 0; +}; + +// Column schema definition shared by table/global projection and file-local schema matching. +// +// ColumnDefinition intentionally carries schema identity only. FE column unique ids are translated +// to GlobalIndex at the FileScannerV2 boundary and must not appear in table/file reader APIs. +struct ColumnDefinition { + // Typed identifier value used to match a column against another schema. + // + // - TYPE_NULL: no explicit identifier. BY_NAME falls back to ColumnDefinition::name. + // - TYPE_INT: interpreted by TableColumnMapperOptions::mode as a field id or file position. + // - TYPE_STRING: explicit name identifier. + // + // This is not the id that FileReader uses to read data. For example, a Parquet column can be + // matched by its optional Parquet field_id, while the reader still addresses it by a file-local + // ordinal. + Field identifier; + // Reader-local id of this node inside the file schema returned by FileReader::get_schema(). + // Top-level fields use the root column ordinal and nested fields use the child ordinal under + // their parent. -1 means unset; special virtual file columns may use other negative ids. + // Table/global ColumnDefinition values can leave this as -1 because they are not read directly + // by a FileReader. + int32_t local_id = -1; + // Logical table column name. This is also the matching name for by-name file formats. + std::string name; + // Historical or external names for the same logical field. Table formats such as Iceberg can + // use this to resolve partition path keys after column rename. + std::vector name_mapping {}; + DataTypePtr type; + // Semantic nested children for this schema node. + // + // Table/global columns carry projected table children. File-local schemas returned by + // FileReader::get_schema() also expose semantic children, not physical reader wrappers. For + // example, MAP children are key/value and ARRAY children contain only the element field. + std::vector children {}; + // Expression used to materialize missing/default/generated values when the column is not read + // directly from the file. + VExprContextSPtr default_expr = nullptr; + // Partition columns are constants from split metadata and should not be matched against file + // schema unless table-format logic explicitly asks for it. + bool is_partition_key = false; + // File-local column kind. For table/global columns this remains DATA_COLUMN. + ColumnType column_type = ColumnType::DATA_COLUMN; + + bool has_identifier() const { return !identifier.is_null(); } + bool has_identifier_field_id() const { return identifier.get_type() == TYPE_INT; } + bool has_identifier_name() const { return identifier.get_type() == TYPE_STRING; } + + // DuckDB-style helper for BY_FIELD_ID matching. The mapper binds the matching mode once, so a + // TYPE_INT identifier is interpreted as a field id only by the field-id matcher. + int32_t get_identifier_field_id() const { + DORIS_CHECK(has_identifier_field_id()); + return identifier.get(); + } + // DuckDB-style helper for BY_NAME matching. When no explicit string identifier is present, the + // logical column name is the identifier. + const std::string& get_identifier_name() const { + if (identifier.is_null()) { + return name; + } + DORIS_CHECK(has_identifier_name()); + return identifier.get(); + } + // Helper for BY_INDEX matching. BY_INDEX reuses the TYPE_INT identifier as the table-side file + // position, matching DuckDB's typed identifier plus mapper-mode interpretation. + int32_t get_identifier_position() const { + DORIS_CHECK(has_identifier_field_id()); + return identifier.get(); + } + + // Helper for reader-local projection and scan requests. + int32_t file_local_id() const { + if (local_id != -1) { + return local_id; + } + return get_identifier_field_id(); + } + + std::string debug_string() const; +}; + +static constexpr int ROW_POSITION_COLUMN_ID = -10001; +static constexpr const char* ROW_POSITION_COLUMN_NAME = "__file_row_position"; +static constexpr int GLOBAL_ROWID_COLUMN_ID = -10002; + +inline ColumnDefinition row_position_column_definition() { + ColumnDefinition field; + field.identifier = Field::create_field(ROW_POSITION_COLUMN_ID); + field.local_id = ROW_POSITION_COLUMN_ID; + field.name = ROW_POSITION_COLUMN_NAME; + field.type = std::make_shared(); + field.column_type = ColumnType::ROW_NUMBER; + return field; +} + +inline ColumnDefinition global_rowid_column_definition() { + ColumnDefinition field; + field.identifier = Field::create_field(BeConsts::GLOBAL_ROWID_COL); + field.local_id = GLOBAL_ROWID_COLUMN_ID; + field.name = BeConsts::GLOBAL_ROWID_COL; + field.type = std::make_shared(); + field.column_type = ColumnType::GLOBAL_ROWID; + return field; +} + +// Recursive file-local projection path. +// +// For a root entry in FileScanRequest::{predicate_columns, non_predicate_columns}, index is the +// top-level file column id and column_id() is valid. For children, index is the file-local child id +// under the parent node. This is the reader schema local id, not an Iceberg/Parquet field id, not a +// table child id, and not a child output ordinal. +// +// project_all_children=true means the whole subtree under this node is needed. When false, children +// lists the selected child paths. File readers can use this to avoid constructing readers for +// unprojected nested children. +struct LocalColumnIndex { + int32_t index = -1; + bool project_all_children = true; + std::vector children {}; + + static LocalColumnIndex top_level(LocalColumnId column_id) { + return {.index = column_id.value()}; + } + + static LocalColumnIndex local(int32_t local_id) { return {.index = local_id}; } + + static LocalColumnIndex partial_local(int32_t local_id) { + return {.index = local_id, .project_all_children = false}; + } + + LocalColumnId column_id() const { return LocalColumnId(index); } + int32_t local_id() const { return index; } + std::string debug_string() const; +}; + +inline bool is_full_projection(const LocalColumnIndex* projection) { + return projection == nullptr || projection->project_all_children; +} + +inline bool is_partial_projection(const LocalColumnIndex* projection) { + return projection != nullptr && !projection->project_all_children; +} + +inline const LocalColumnIndex* find_child_projection(const LocalColumnIndex* projection, + int32_t local_id) { + if (is_full_projection(projection)) { + return nullptr; + } + const auto child_it = std::find_if( + projection->children.begin(), projection->children.end(), + [&](const LocalColumnIndex& child) { return child.local_id() == local_id; }); + return child_it == projection->children.end() ? nullptr : &*child_it; +} + +inline bool is_child_projected(const LocalColumnIndex* projection, int32_t local_id) { + return is_full_projection(projection) || find_child_projection(projection, local_id) != nullptr; +} + +// Merge two projection trees that point to the same file-local node. +// +// A full projection dominates a partial projection. Two partial projections are merged by child id +// and recursively union their child paths. The caller must only merge projections for the same +// root/child node. +inline Status merge_local_column_index(LocalColumnIndex* target, const LocalColumnIndex& source) { + DORIS_CHECK(target != nullptr); + DORIS_CHECK(target->index == source.index); + if (target->project_all_children) { + return Status::OK(); + } + if (source.project_all_children) { + target->project_all_children = true; + target->children.clear(); + return Status::OK(); + } + for (const auto& source_child : source.children) { + auto target_child_it = std::find_if( + target->children.begin(), target->children.end(), + [&](const LocalColumnIndex& child) { return child.index == source_child.index; }); + if (target_child_it == target->children.end()) { + target->children.push_back(source_child); + continue; + } + RETURN_IF_ERROR(merge_local_column_index(&*target_child_it, source_child)); + } + return Status::OK(); +} + +} // namespace doris::format diff --git a/be/src/format_v2/column_mapper.cpp b/be/src/format_v2/column_mapper.cpp new file mode 100644 index 00000000000000..e6a0e1a28e7422 --- /dev/null +++ b/be/src/format_v2/column_mapper.cpp @@ -0,0 +1,2029 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/column_mapper.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "common/consts.h" +#include "common/exception.h" +#include "common/status.h" +#include "core/data_type/convert_field_to_type.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type/data_type_struct.h" +#include "core/data_type/primitive_type.h" +#include "exprs/runtime_filter_expr.h" +#include "exprs/short_circuit_evaluation_expr.h" +#include "exprs/vcase_expr.h" +#include "exprs/vcast_expr.h" +#include "exprs/vcondition_expr.h" +#include "exprs/vectorized_fn_call.h" +#include "exprs/vexpr_context.h" +#include "exprs/vin_predicate.h" +#include "exprs/vliteral.h" +#include "format_v2/column_mapper_nested.h" +#include "format_v2/expr/cast.h" +#include "format_v2/file_reader.h" +#include "format_v2/schema_projection.h" +#include "format_v2/table_reader.h" +#include "gen_cpp/Exprs_types.h" + +namespace doris::format { + +namespace { + +std::string mapping_mode_to_string(TableColumnMappingMode mode) { + switch (mode) { + case TableColumnMappingMode::BY_FIELD_ID: + return "BY_FIELD_ID"; + case TableColumnMappingMode::BY_NAME: + return "BY_NAME"; + case TableColumnMappingMode::BY_INDEX: + return "BY_INDEX"; + } + return "UNKNOWN"; +} + +bool column_has_name(const ColumnDefinition& column, const std::string& name) { + if (to_lower(column.name) == to_lower(name)) { + return true; + } + if (column.has_identifier_name() && to_lower(column.get_identifier_name()) == to_lower(name)) { + return true; + } + return std::ranges::any_of(column.name_mapping, [&](const std::string& alias) { + return to_lower(alias) == to_lower(name); + }); +} + +bool column_names_match(const ColumnDefinition& lhs, const ColumnDefinition& rhs) { + if (column_has_name(rhs, lhs.name)) { + return true; + } + if (lhs.has_identifier_name() && column_has_name(rhs, lhs.get_identifier_name())) { + return true; + } + return std::ranges::any_of(lhs.name_mapping, [&](const std::string& alias) { + return column_has_name(rhs, alias); + }); +} + +class ColumnMatcher { +public: + virtual ~ColumnMatcher() = default; + virtual const ColumnDefinition* find( + const ColumnDefinition& table_column, + const std::vector& file_schema) const = 0; +}; + +class FieldIdMatcher final : public ColumnMatcher { +public: + const ColumnDefinition* find(const ColumnDefinition& table_column, + const std::vector& file_schema) const override { + if (!table_column.has_identifier_field_id()) { + return nullptr; + } + const auto field_id = table_column.get_identifier_field_id(); + const auto field_it = std::ranges::find_if(file_schema, [&](const ColumnDefinition& field) { + return field.has_identifier_field_id() && field.get_identifier_field_id() == field_id; + }); + return field_it == file_schema.end() ? nullptr : &*field_it; + } +}; + +class NameMatcher final : public ColumnMatcher { +public: + const ColumnDefinition* find(const ColumnDefinition& table_column, + const std::vector& file_schema) const override { + const auto field_it = std::ranges::find_if(file_schema, [&](const ColumnDefinition& field) { + return column_names_match(table_column, field); + }); + return field_it == file_schema.end() ? nullptr : &*field_it; + } +}; + +class PositionMatcher final : public ColumnMatcher { +public: + const ColumnDefinition* find(const ColumnDefinition& table_column, + const std::vector& file_schema) const override { + if (!table_column.has_identifier_field_id()) { + return nullptr; + } + const auto position = table_column.get_identifier_position(); + if (position < 0 || static_cast(position) >= file_schema.size()) { + return nullptr; + } + return &file_schema[static_cast(position)]; + } +}; + +const ColumnMatcher& matcher_for_mode(TableColumnMappingMode mode) { + static const FieldIdMatcher field_id_matcher; + static const NameMatcher name_matcher; + static const PositionMatcher position_matcher; + switch (mode) { + case TableColumnMappingMode::BY_FIELD_ID: + return field_id_matcher; + case TableColumnMappingMode::BY_NAME: + return name_matcher; + case TableColumnMappingMode::BY_INDEX: + return position_matcher; + } + return field_id_matcher; +} + +std::string virtual_column_type_to_string(TableVirtualColumnType type) { + switch (type) { + case TableVirtualColumnType::INVALID: + return "INVALID"; + case TableVirtualColumnType::ROW_ID: + return "ROW_ID"; + case TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER: + return "LAST_UPDATED_SEQUENCE_NUMBER"; + case TableVirtualColumnType::ICEBERG_ROWID: + return "ICEBERG_ROWID"; + } + return "UNKNOWN"; +} + +std::string filter_conversion_type_to_string(FilterConversionType type) { + switch (type) { + case FilterConversionType::COPY_DIRECTLY: + return "COPY_DIRECTLY"; + case FilterConversionType::CAST_FILTER: + return "CAST_FILTER"; + case FilterConversionType::READER_EXPRESSION: + return "READER_EXPRESSION"; + case FilterConversionType::FINALIZE_ONLY: + return "FINALIZE_ONLY"; + case FilterConversionType::CONSTANT: + return "CONSTANT"; + } + return "UNKNOWN"; +} + +std::string data_type_debug_string(const DataTypePtr& type) { + return type == nullptr ? "null" : type->get_name(); +} + +std::string field_debug_string(const Field& field) { + std::ostringstream out; + out << "Field{type=" << type_to_string(field.get_type()) << ", value="; + switch (field.get_type()) { + case TYPE_NULL: + out << "null"; + break; + case TYPE_INT: + out << field.get(); + break; + case TYPE_BIGINT: + out << field.get(); + break; + case TYPE_STRING: + out << field.get(); + break; + default: + out << field.to_debug_string(0); + break; + } + out << "}"; + return out.str(); +} + +template +std::string join_debug_strings(const std::vector& values, Formatter formatter) { + std::ostringstream out; + out << "["; + for (size_t i = 0; i < values.size(); ++i) { + if (i > 0) { + out << ", "; + } + out << formatter(values[i]); + } + out << "]"; + return out.str(); +} + +} // namespace + +const Field* find_partition_value(const ColumnDefinition& table_column, + const std::map& partition_values) { + const auto find_by_name = [&](const std::string& name) -> const Field* { + const auto value_it = partition_values.find(name); + return value_it == partition_values.end() ? nullptr : &value_it->second; + }; + if (const auto* value = find_by_name(table_column.name); value != nullptr) { + return value; + } + if (table_column.has_identifier_name()) { + if (const auto* value = find_by_name(table_column.get_identifier_name()); + value != nullptr) { + return value; + } + } + for (const auto& alias : table_column.name_mapping) { + if (const auto* value = find_by_name(alias); value != nullptr) { + return value; + } + } + return nullptr; +} + +struct FileSlotRewriteInfo { + size_t block_position = 0; + DataTypePtr file_type; + DataTypePtr table_type; + std::string file_column_name; +}; + +struct RewriteContext { + RuntimeState* runtime_state = nullptr; + std::vector created_exprs {}; + + void add_created_expr(VExprSPtr expr) { created_exprs.push_back(std::move(expr)); } + + Status prepare_created_exprs(VExprContext* context) const { + DORIS_CHECK(context != nullptr); + RowDescriptor row_desc; + for (const auto& expr : created_exprs) { + if (dynamic_cast(expr.get()) != nullptr && runtime_state == nullptr) { + return Status::InvalidArgument( + "RuntimeState is required to prepare rewritten cast expression {}", + expr->expr_name()); + } + RETURN_IF_ERROR(expr->prepare(runtime_state, row_desc, context)); + } + return Status::OK(); + } +}; + +static VExprSPtr create_file_slot_ref(const VSlotRef& slot_ref, + const FileSlotRewriteInfo& rewrite_info, + RewriteContext* rewrite_context) { + auto ref = + VSlotRef::create_shared(slot_ref.slot_id(), cast_set(rewrite_info.block_position), + -1, rewrite_info.file_type, rewrite_info.file_column_name); + rewrite_context->add_created_expr(ref); + return ref; +} + +static bool is_cast_expr(const VExprSPtr& expr) { + return dynamic_cast(expr.get()) != nullptr; +} + +static bool is_binary_comparison_predicate(const VExprSPtr& expr) { + if (expr == nullptr || expr->get_num_children() != 2 || + (expr->node_type() != TExprNodeType::BINARY_PRED && + expr->node_type() != TExprNodeType::NULL_AWARE_BINARY_PRED)) { + return false; + } + switch (expr->op()) { + case TExprOpcode::EQ: + case TExprOpcode::EQ_FOR_NULL: + case TExprOpcode::NE: + case TExprOpcode::GE: + case TExprOpcode::GT: + case TExprOpcode::LE: + case TExprOpcode::LT: + return true; + default: + return false; + } +} + +std::string TableColumnMapperOptions::debug_string() const { + std::ostringstream out; + out << "TableColumnMapperOptions{mode=" << mapping_mode_to_string(mode) << "}"; + return out.str(); +} + +std::string ColumnDefinition::debug_string() const { + std::ostringstream out; + out << "ColumnDefinition{name=" << name << ", identifier=" << field_debug_string(identifier) + << ", name_mapping=" + << join_debug_strings(name_mapping, [](const std::string& name) { return name; }) + << ", local_id=" << local_id << ", type=" << data_type_debug_string(type) << ", children=" + << join_debug_strings(children, + [](const ColumnDefinition& child) { return child.debug_string(); }) + << ", has_default_expr=" << (default_expr != nullptr) + << ", is_partition_key=" << is_partition_key << "}"; + return out.str(); +} + +std::string LocalColumnIndex::debug_string() const { + std::ostringstream out; + out << "LocalColumnIndex{index=" << index << ", project_all_children=" << project_all_children + << ", children=" + << join_debug_strings(children, + [](const LocalColumnIndex& child) { return child.debug_string(); }) + << "}"; + return out.str(); +} + +std::string ColumnMapping::debug_string() const { + std::ostringstream out; + out << "ColumnMapping{global_index=" << global_index + << ", table_column_name=" << table_column_name << ", file_local_id="; + if (file_local_id.has_value()) { + out << *file_local_id; + } else { + out << "null"; + } + out << ", constant_index="; + if (constant_index.has_value()) { + out << *constant_index; + } else { + out << "null"; + } + out << ", file_column_name=" << file_column_name + << ", original_file_type=" << data_type_debug_string(original_file_type) + << ", original_file_children=" + << join_debug_strings(original_file_children, + [](const ColumnDefinition& child) { return child.debug_string(); }) + << ", file_type=" << data_type_debug_string(file_type) + << ", table_type=" << data_type_debug_string(table_type) + << ", has_projection=" << (projection != nullptr) << ", child_mappings=" + << join_debug_strings(child_mappings, + [](const ColumnMapping& child) { return child.debug_string(); }) + << ", is_trivial=" << is_trivial << ", is_constant=" << constant_index.has_value() + << ", filter_conversion=" << filter_conversion_type_to_string(filter_conversion) + << ", virtual_column_type=" << virtual_column_type_to_string(virtual_column_type) + << ", has_default_expr=" << (default_expr != nullptr) << "}"; + return out.str(); +} + +std::string TableColumnMapper::debug_string() const { + std::ostringstream out; + out << "TableColumnMapper{options=" << _options.debug_string() << ", mappings=" + << join_debug_strings(_mappings, + [](const ColumnMapping& mapping) { return mapping.debug_string(); }) + << ", hidden_mappings=" + << join_debug_strings(_hidden_mappings, + [](const ColumnMapping& mapping) { return mapping.debug_string(); }) + << ", constant_count=" << _constant_map.size() << "}"; + return out.str(); +} + +static const FileSlotRewriteInfo* find_slot_rewrite_info( + const VExprSPtr& expr, + const std::map& global_to_file_slot, + const VSlotRef** slot_ref) { + if (expr == nullptr) { + return nullptr; + } + VExprSPtr slot_expr = expr; + const bool input_is_cast = is_cast_expr(expr) && expr->get_num_children() == 1; + if (is_cast_expr(expr) && expr->get_num_children() == 1) { + slot_expr = expr->children()[0]; + } + if (!slot_expr->is_slot_ref()) { + return nullptr; + } + const auto* candidate_slot_ref = assert_cast(slot_expr.get()); + const auto rewrite_it = global_to_file_slot.find(slot_ref_global_index(*candidate_slot_ref)); + if (rewrite_it == global_to_file_slot.end()) { + return nullptr; + } + if (input_is_cast && !expr->data_type()->equals(*rewrite_it->second.table_type)) { + return nullptr; + } + if (slot_ref != nullptr) { + *slot_ref = candidate_slot_ref; + } + return &rewrite_it->second; +} + +static bool filter_conversion_has_local_source(FilterConversionType conversion) { + switch (conversion) { + case FilterConversionType::COPY_DIRECTLY: + case FilterConversionType::CAST_FILTER: + case FilterConversionType::READER_EXPRESSION: + return true; + case FilterConversionType::FINALIZE_ONLY: + case FilterConversionType::CONSTANT: + return false; + } + return false; +} + +static bool column_predicate_can_use_local_source(FilterConversionType conversion) { + switch (conversion) { + case FilterConversionType::COPY_DIRECTLY: + return true; + case FilterConversionType::CAST_FILTER: + case FilterConversionType::READER_EXPRESSION: + case FilterConversionType::FINALIZE_ONLY: + case FilterConversionType::CONSTANT: + return false; + } + return false; +} + +static bool table_filter_has_only_local_entries( + const TableFilter& table_filter, const std::map& filter_entries) { + for (const auto global_index : table_filter.global_indices) { + const auto entry_it = filter_entries.find(global_index); + if (entry_it == filter_entries.end() || !entry_it->second.is_local()) { + return false; + } + } + return true; +} + +static VExprSPtr unwrap_literal_for_file_cast(const VExprSPtr& expr, + const DataTypePtr& table_type) { + if (expr == nullptr) { + return nullptr; + } + if (expr->is_literal()) { + return expr; + } + if (is_cast_expr(expr) && expr->get_num_children() == 1 && expr->children()[0]->is_literal() && + expr->children()[0]->data_type()->equals(*table_type)) { + return expr->children()[0]; + } + return nullptr; +} + +static Field literal_field_from_expr(const VExpr& literal_expr) { + DORIS_CHECK(literal_expr.is_literal()); + const auto* literal = dynamic_cast(&literal_expr); + DORIS_CHECK(literal != nullptr); + Field field; + literal->get_column_ptr()->get(0, field); + return field; +} + +// Table filter localization clones an already-prepared table expr and then rewrites it to file +// slots. Only split-local literals and BE cast nodes need table-reader-specific clone behavior; +// plain slot refs and literals use their own VExpr::clone_node(). +static Status clone_table_expr_node(const VExpr& expr, VExprSPtr* cloned_expr) { + DORIS_CHECK(cloned_expr != nullptr); + if (const auto* split_literal = dynamic_cast(&expr)) { + *cloned_expr = std::make_shared( + split_literal->data_type(), literal_field_from_expr(expr), + split_literal->original_type(), split_literal->original_field()); + } else if (const auto* vcast_expr = dynamic_cast(&expr); + vcast_expr != nullptr && vcast_expr->node_type() == TExprNodeType::CAST_EXPR) { + *cloned_expr = Cast::create_shared(vcast_expr->data_type()); + } + return Status::OK(); +} + +Status clone_table_expr_tree(const VExprSPtr& expr, VExprSPtr* cloned_expr) { + DORIS_CHECK(cloned_expr != nullptr); + if (expr == nullptr) { + *cloned_expr = nullptr; + return Status::OK(); + } + return expr->deep_clone(cloned_expr, clone_table_expr_node); +} + +static VExprSPtr original_table_literal(const VExprSPtr& literal_expr, + RewriteContext* rewrite_context = nullptr) { + DORIS_CHECK(literal_expr != nullptr); + DORIS_CHECK(literal_expr->is_literal()); + const auto* rewritten_literal = dynamic_cast(literal_expr.get()); + if (rewritten_literal == nullptr) { + return literal_expr; + } + auto literal = VLiteral::create_shared(rewritten_literal->original_type(), + rewritten_literal->original_field()); + if (rewrite_context != nullptr) { + rewrite_context->add_created_expr(literal); + } + return literal; +} + +static ColumnDefinition hidden_column_from_slot_ref(const VSlotRef& slot_ref) { + ColumnDefinition column; + column.name = slot_ref.column_name(); + column.identifier = Field::create_field(column.name); + column.type = slot_ref.data_type(); + return column; +} + +static void collect_top_level_slot_columns(const VExprSPtr& expr, + std::map* columns) { + DORIS_CHECK(columns != nullptr); + if (expr == nullptr) { + return; + } + if (expr->is_slot_ref()) { + const auto* slot_ref = assert_cast(expr.get()); + columns->try_emplace(slot_ref_global_index(*slot_ref), + hidden_column_from_slot_ref(*slot_ref)); + return; + } + for (const auto& child : expr->children()) { + collect_top_level_slot_columns(child, columns); + } +} + +static VExprSPtr rewrite_literal_to_file_type(const VExprSPtr& literal_expr, + const FileSlotRewriteInfo& rewrite_info, + RewriteContext* rewrite_context) { + DORIS_CHECK(literal_expr != nullptr); + DORIS_CHECK(literal_expr->is_literal()); + const auto original_literal = original_table_literal(literal_expr, rewrite_context); + const Field original_field = literal_field(original_literal); + if (rewrite_info.file_type->equals(*original_literal->data_type())) { + return original_literal; + } + Field file_field; + try { + convert_field_to_type(original_field, *rewrite_info.file_type, &file_field, + original_literal->data_type().get()); + } catch (const Exception&) { + return nullptr; + } + if (file_field.is_null()) { + return nullptr; + } + if (file_field.get_type() != remove_nullable(rewrite_info.file_type)->get_primitive_type()) { + return nullptr; + } + auto literal = std::make_shared( + rewrite_info.file_type, file_field, original_literal->data_type(), original_field); + rewrite_context->add_created_expr(literal); + return literal; +} + +static bool rewrite_binary_slot_literal_predicate( + const VExprSPtr& expr, + const std::map& global_to_file_slot, + RewriteContext* rewrite_context) { + if (!is_binary_comparison_predicate(expr)) { + return false; + } + auto children = expr->children(); + const VSlotRef* slot_ref = nullptr; + const FileSlotRewriteInfo* rewrite_info = + find_slot_rewrite_info(children[0], global_to_file_slot, &slot_ref); + int slot_child_idx = 0; + int literal_child_idx = 1; + if (rewrite_info == nullptr) { + rewrite_info = find_slot_rewrite_info(children[1], global_to_file_slot, &slot_ref); + slot_child_idx = 1; + literal_child_idx = 0; + } + if (rewrite_info == nullptr || slot_ref == nullptr) { + return false; + } + auto literal_expr = + unwrap_literal_for_file_cast(children[literal_child_idx], rewrite_info->table_type); + if (literal_expr == nullptr) { + return false; + } + + auto rewritten_literal = + rewrite_literal_to_file_type(literal_expr, *rewrite_info, rewrite_context); + if (rewritten_literal == nullptr) { + children[literal_child_idx] = original_table_literal(literal_expr, rewrite_context); + expr->set_children(std::move(children)); + return false; + } + + children[slot_child_idx] = create_file_slot_ref(*slot_ref, *rewrite_info, rewrite_context); + children[literal_child_idx] = std::move(rewritten_literal); + expr->set_children(std::move(children)); + return true; +} + +static bool rewrite_in_slot_literal_predicate( + const VExprSPtr& expr, + const std::map& global_to_file_slot, + RewriteContext* rewrite_context) { + if (expr->node_type() != TExprNodeType::IN_PRED || expr->get_num_children() < 2) { + return false; + } + auto children = expr->children(); + const VSlotRef* slot_ref = nullptr; + const FileSlotRewriteInfo* rewrite_info = + find_slot_rewrite_info(children[0], global_to_file_slot, &slot_ref); + if (rewrite_info == nullptr || slot_ref == nullptr) { + return false; + } + + VExprSPtrs rewritten_literals; + rewritten_literals.reserve(children.size() - 1); + for (size_t child_idx = 1; child_idx < children.size(); ++child_idx) { + auto literal_expr = + unwrap_literal_for_file_cast(children[child_idx], rewrite_info->table_type); + if (literal_expr == nullptr) { + return false; + } + auto rewritten_literal = + rewrite_literal_to_file_type(literal_expr, *rewrite_info, rewrite_context); + if (rewritten_literal == nullptr) { + for (size_t restore_idx = 1; restore_idx < children.size(); ++restore_idx) { + auto restore_literal = unwrap_literal_for_file_cast(children[restore_idx], + rewrite_info->table_type); + if (restore_literal != nullptr) { + children[restore_idx] = + original_table_literal(restore_literal, rewrite_context); + } + } + expr->set_children(std::move(children)); + return false; + } + rewritten_literals.push_back(std::move(rewritten_literal)); + } + + children[0] = create_file_slot_ref(*slot_ref, *rewrite_info, rewrite_context); + for (size_t literal_idx = 0; literal_idx < rewritten_literals.size(); ++literal_idx) { + children[literal_idx + 1] = std::move(rewritten_literals[literal_idx]); + } + expr->set_children(std::move(children)); + return true; +} + +static VExprSPtr create_file_struct_child_name_literal(const std::string& file_child_name, + RewriteContext* rewrite_context) { + auto literal = VLiteral::create_shared(std::make_shared(), + Field::create_field(file_child_name)); + rewrite_context->add_created_expr(literal); + return literal; +} + +static bool needs_complex_file_slot_cast(const DataTypePtr& file_type, + const DataTypePtr& table_type) { + if (file_type == nullptr || table_type == nullptr || file_type->equals(*table_type)) { + return false; + } + const auto file_nested_type = remove_nullable(file_type); + const auto table_nested_type = remove_nullable(table_type); + if (file_nested_type->equals(*table_nested_type)) { + return false; + } + return is_complex_type(file_nested_type->get_primitive_type()) || + is_complex_type(table_nested_type->get_primitive_type()); +} + +static bool collect_struct_element_chain(const VExprSPtr& expr, std::vector* chain) { + DORIS_CHECK(chain != nullptr); + if (!is_struct_element_expr(expr)) { + return false; + } + const auto& parent = expr->children()[0]; + if (is_struct_element_expr(parent)) { + if (!collect_struct_element_chain(parent, chain)) { + return false; + } + } else if (!parent->is_slot_ref()) { + // Only support file-local rewrite for struct child chains rooted directly at a top-level + // slot, for example `element_at(s, 'a')` or `element_at(element_at(s, 'a'), 'b')`. + // + // Do not localize computed complex parents such as + // `element_at(element_at(map_values(m), 1), 'full_name')`. The intermediate map/array + // result has already been reshaped by scan projection and may have a different child order + // from the table expression. Partially rewriting that expression against the file block can + // silently evaluate the wrong struct child and filter out valid rows. Those predicates must + // remain as table-level conjuncts and be evaluated after TableReader materialization. + return false; + } + chain->push_back(expr); + return true; +} + +static bool rewrite_struct_element_path_to_file_expr( + const VExprSPtr& expr, const std::vector& mappings, + const std::map& global_to_file_slot, + RewriteContext* rewrite_context) { + ResolvedNestedStructPath resolved; + if (!resolve_nested_struct_expr_for_file(expr, mappings, &resolved)) { + return false; + } + + std::vector struct_element_chain; + if (!collect_struct_element_chain(expr, &struct_element_chain) || + struct_element_chain.size() != resolved.file_child_names.size() || + struct_element_chain.size() != resolved.file_child_types.size()) { + return false; + } + + auto root_children = struct_element_chain.front()->children(); + if (!root_children[0]->is_slot_ref()) { + return false; + } + const auto* slot_ref = assert_cast(root_children[0].get()); + const auto rewrite_it = global_to_file_slot.find(slot_ref_global_index(*slot_ref)); + if (rewrite_it == global_to_file_slot.end()) { + return false; + } + + // File-local conjuncts are prepared against the file-reader Block, so both the root slot and + // every struct selector must be expressed in file schema terms. For a renamed Iceberg field, + // keeping the table selector would prepare `element_at(file_struct, 'renamed')` and + // fail before any rows are read. Rewrite the whole chain while ColumnMapping still preserves + // the table-to-file relationship. Example: + // table filter: element_at(element_at(s, 'renamed_parent'), 'renamed_leaf') + // old file: s> + // file filter: element_at(element_at(s, 'parent'), 'leaf') + root_children[0] = create_file_slot_ref(*slot_ref, rewrite_it->second, rewrite_context); + struct_element_chain.front()->set_children(std::move(root_children)); + for (size_t idx = 0; idx < struct_element_chain.size(); ++idx) { + auto children = struct_element_chain[idx]->children(); + children[1] = create_file_struct_child_name_literal(resolved.file_child_names[idx], + rewrite_context); + struct_element_chain[idx]->set_children(std::move(children)); + // The selector name and the expression return type must be moved to file schema together. + // Example: + // table filter: element_at(element_at(s, 'new_a'), 'new_aa') = 50 + // old file: s.new_a STRUCT + // file filter: element_at(element_at(s, 'new_a'), 'aa') = 50 + // + // If the inner element_at keeps the table return type STRUCT, preparing the + // outer element_at(..., 'aa') fails before scanning because `aa` is not a table field. + struct_element_chain[idx]->data_type() = resolved.file_child_types[idx]; + } + return true; +} + +static VExprSPtr rewrite_table_expr_to_file_expr( + const VExprSPtr& expr, + const std::map& global_to_file_slot, + const std::vector& filter_mappings, RewriteContext* rewrite_context, + bool* can_localize) { + if (expr == nullptr) { + return nullptr; + } + DORIS_CHECK(rewrite_context != nullptr); + DORIS_CHECK(can_localize != nullptr); + if (auto* runtime_filter = dynamic_cast(expr.get()); + runtime_filter != nullptr) { + auto impl = runtime_filter->get_impl(); + if (impl == nullptr) { + *can_localize = false; + return expr; + } + auto localized_impl = rewrite_table_expr_to_file_expr( + impl, global_to_file_slot, filter_mappings, rewrite_context, can_localize); + if (!*can_localize) { + return expr; + } + runtime_filter->set_impl(std::move(localized_impl)); + return expr; + } + if (rewrite_binary_slot_literal_predicate(expr, global_to_file_slot, rewrite_context)) { + return expr; + } + if (rewrite_in_slot_literal_predicate(expr, global_to_file_slot, rewrite_context)) { + return expr; + } + if (is_struct_element_expr(expr)) { + if (!rewrite_struct_element_path_to_file_expr(expr, filter_mappings, global_to_file_slot, + rewrite_context)) { + // The scanner still evaluates the original table-level conjunct after TableReader + // finalizes the output block. Skipping an unlocalizable file conjunct is therefore + // safer than preparing a partially rewritten expression against the wrong struct + // layout. In particular, do not generate file-local conjuncts for computed complex + // parents such as `element_at(element_at(map_values(m), 1), 'field')`; only direct + // slot-rooted struct chains are supported here. + *can_localize = false; + } + return expr; + } + if (expr->is_slot_ref()) { + const auto* slot_ref = assert_cast(expr.get()); + const auto rewrite_it = global_to_file_slot.find(slot_ref_global_index(*slot_ref)); + if (rewrite_it != global_to_file_slot.end()) { + const auto& rewrite_info = rewrite_it->second; + auto file_slot = create_file_slot_ref(*slot_ref, rewrite_info, rewrite_context); + if (rewrite_info.file_type->equals(*rewrite_info.table_type)) { + return file_slot; + } + if (needs_complex_file_slot_cast(rewrite_info.file_type, rewrite_info.table_type)) { + // Generic file-local expressions cannot safely cast an evolved complex file slot + // back to the table type. Example: + // + // table filter: ARRAY_CONTAINS(MAP_KEYS(m), 'person5') + // old file: m MAP> + // table: m MAP> + // + // Although MAP_KEYS only reads the key column, wrapping the file slot as + // `CAST(file_m AS table_m)` forces the value struct cast first and fails because + // the old and new value structs have different fields. Keep such filters at the + // table level, where TableReader materializes the evolved complex value before + // Scanner evaluates the original conjunct. Direct slot-rooted struct child paths + // are handled by rewrite_struct_element_path_to_file_expr() above. + *can_localize = false; + return expr; + } + auto cast_expr = Cast::create_shared(rewrite_info.table_type); + cast_expr->add_child(std::move(file_slot)); + rewrite_context->add_created_expr(cast_expr); + return cast_expr; + } + return expr; + } + // The input is a split-local cloned tree. A previous split-local clone may already have + // inserted Cast(slot). Keep that rewrite idempotent: rewrite the cast child from table slot to + // the current split's file slot, and drop the cast when the current split no longer needs it. + if (is_cast_expr(expr) && expr->get_num_children() == 1) { + const auto& child = expr->children()[0]; + if (child->is_slot_ref()) { + const auto* slot_ref = assert_cast(child.get()); + const auto rewrite_it = global_to_file_slot.find(slot_ref_global_index(*slot_ref)); + if (rewrite_it != global_to_file_slot.end() && + expr->data_type()->equals(*rewrite_it->second.table_type)) { + auto rewritten_child = + create_file_slot_ref(*slot_ref, rewrite_it->second, rewrite_context); + if (rewrite_it->second.file_type->equals(*rewrite_it->second.table_type)) { + return rewritten_child; + } + if (needs_complex_file_slot_cast(rewrite_it->second.file_type, + rewrite_it->second.table_type)) { + *can_localize = false; + return expr; + } + expr->set_children({std::move(rewritten_child)}); + return expr; + } + } + } + + VExprSPtrs rewritten_children; + rewritten_children.reserve(expr->children().size()); + for (const auto& child : expr->children()) { + rewritten_children.push_back(rewrite_table_expr_to_file_expr( + child, global_to_file_slot, filter_mappings, rewrite_context, can_localize)); + } + expr->set_children(std::move(rewritten_children)); + return expr; +} + +static constexpr const char* ROW_LINEAGE_ROW_ID = "_row_id"; +static constexpr const char* ROW_LINEAGE_LAST_UPDATED_SEQ_NUMBER = "_last_updated_sequence_number"; +static constexpr int32_t ROW_LINEAGE_ROW_ID_FIELD_ID = 2147483540; +static constexpr int32_t ROW_LINEAGE_LAST_UPDATED_SEQ_NUMBER_FIELD_ID = 2147483539; + +static TableVirtualColumnType row_lineage_virtual_column_type(const std::string& column_name) { + if (column_name == ROW_LINEAGE_ROW_ID) { + return TableVirtualColumnType::ROW_ID; + } + if (column_name == ROW_LINEAGE_LAST_UPDATED_SEQ_NUMBER) { + return TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER; + } + return TableVirtualColumnType::INVALID; +} + +static TableVirtualColumnType row_lineage_virtual_column_type_by_field_id( + const ColumnDefinition& column) { + if (!column.has_identifier_field_id()) { + return TableVirtualColumnType::INVALID; + } + switch (column.get_identifier_field_id()) { + case ROW_LINEAGE_ROW_ID_FIELD_ID: + return TableVirtualColumnType::ROW_ID; + case ROW_LINEAGE_LAST_UPDATED_SEQ_NUMBER_FIELD_ID: + return TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER; + default: + return TableVirtualColumnType::INVALID; + } +} + +static TableVirtualColumnType row_lineage_virtual_column_type(const ColumnDefinition& column, + TableColumnMappingMode mode) { + switch (mode) { + case TableColumnMappingMode::BY_FIELD_ID: + return row_lineage_virtual_column_type_by_field_id(column); + case TableColumnMappingMode::BY_NAME: + case TableColumnMappingMode::BY_INDEX: + return row_lineage_virtual_column_type(column.name); + } + return TableVirtualColumnType::INVALID; +} + +// Returns true when the current file type is not the exact nested type the scan should expose. +// This is about building the projected file-side type/projection, not about whether TableReader +// later needs to rematerialize the complex value back to table layout. +static bool needs_projected_file_type_rebuild(const ColumnMapping& mapping) { + if (!is_complex_type(mapping.file_type->get_primitive_type())) { + return false; + } + if (mapping.child_mappings.empty()) { + return false; + } + DORIS_CHECK(mapping.file_type != nullptr); + DORIS_CHECK(mapping.table_type != nullptr); + if (remove_nullable(mapping.file_type)->get_primitive_type() != + remove_nullable(mapping.table_type)->get_primitive_type()) { + return true; + } + if (!mapping.table_type->equals(*mapping.file_type)) { + return true; + } + for (const auto& child_mapping : mapping.child_mappings) { + // Rename-only child mappings do not change the file-side projected shape. If field-id + // matching maps table child `renamed_b` to file child `b`, the file reader can still expose + // the original file type as long as child count/order/types are unchanged. + if (!child_mapping.file_local_id.has_value() || + needs_projected_file_type_rebuild(child_mapping)) { + return true; + } + } + return false; +} + +static std::optional file_child_ordinal_in_scan_type(const ColumnMapping& mapping, + const ColumnMapping& child_mapping) { + if (!child_mapping.file_local_id.has_value()) { + return std::nullopt; + } + const auto& file_children = !mapping.projected_file_children.empty() + ? mapping.projected_file_children + : mapping.original_file_children; + const auto child_it = std::ranges::find_if(file_children, [&](const ColumnDefinition& child) { + return child.file_local_id() == *child_mapping.file_local_id; + }); + if (child_it == file_children.end()) { + return std::nullopt; + } + return static_cast(std::distance(file_children.begin(), child_it)); +} + +static bool needs_complex_rematerialize(const ColumnMapping& mapping) { + if (mapping.child_mappings.empty()) { + return false; + } + if (mapping.table_type == nullptr || mapping.file_type == nullptr || + !mapping.table_type->equals(*mapping.file_type)) { + return true; + } + for (size_t table_child_idx = 0; table_child_idx < mapping.child_mappings.size(); + ++table_child_idx) { + const auto& child_mapping = mapping.child_mappings[table_child_idx]; + const auto file_child_idx = file_child_ordinal_in_scan_type(mapping, child_mapping); + if (!file_child_idx.has_value() || *file_child_idx != table_child_idx || + needs_complex_rematerialize(child_mapping) || + (child_mapping.table_type != nullptr && child_mapping.file_type != nullptr && + !child_mapping.table_type->equals(*child_mapping.file_type))) { + return true; + } + } + return false; +} + +static bool mapping_can_use_file_column_directly(const ColumnMapping& mapping) { + if (mapping.table_type == nullptr || mapping.file_type == nullptr) { + return false; + } + const auto table_type = remove_nullable(mapping.table_type); + const auto file_type = remove_nullable(mapping.file_type); + const bool same_timestamptz_with_different_scale = + table_type->get_primitive_type() == TYPE_TIMESTAMPTZ && + file_type->get_primitive_type() == TYPE_TIMESTAMPTZ; + if (!mapping.table_type->equals(*mapping.file_type) && !same_timestamptz_with_different_scale) { + return false; + } + return !needs_complex_rematerialize(mapping); +} + +static const ColumnDefinition* find_file_child_for_mapping(const ColumnDefinition& table_child, + const ColumnDefinition& file_parent, + TableColumnMappingMode mode, + size_t table_child_idx, + bool allow_ordinal_fallback) { + const auto file_parent_type = remove_nullable(file_parent.type)->get_primitive_type(); + switch (file_parent_type) { + case TYPE_ARRAY: + DORIS_CHECK(file_parent.children.size() == 1); + return &file_parent.children[0]; + case TYPE_MAP: + DORIS_CHECK(file_parent.children.size() == 2); + if (table_child.name == "key") { + return &file_parent.children[0]; + } + if (table_child.name == "value") { + return &file_parent.children[1]; + } + if (table_child.local_id == 0 || table_child.local_id == 1) { + return &file_parent.children[table_child.local_id]; + } + return nullptr; + default: + // Hive BY_INDEX is a top-level column matching rule. Once a complex root is selected by + // file position, nested struct children follow Hive reader's historical name matching + // semantics; their integer identifiers can be field ids, not file positions. + const auto nested_mode = + mode == TableColumnMappingMode::BY_INDEX ? TableColumnMappingMode::BY_NAME : mode; + if (const auto* file_child = + matcher_for_mode(nested_mode).find(table_child, file_parent.children); + file_child != nullptr) { + return file_child; + } + if (allow_ordinal_fallback && mode == TableColumnMappingMode::BY_FIELD_ID && + !table_child.has_identifier_field_id()) { + // Synthetic children are derived from the table DataType when nested ColumnDefinition + // metadata has been pruned away. They do not carry Iceberg field ids, so try a name + // match before falling back to ordinal order. Example: + // table value type: Struct(age, full_name, gender) + // old file value: Struct(name, age) + // Name matching keeps `age -> age`; the later unused-child fallback can then map the + // renamed `full_name -> name` instead of consuming `age` twice. + if (const auto* file_child = NameMatcher().find(table_child, file_parent.children); + file_child != nullptr) { + return file_child; + } + } + // Some callers only carry the full complex DataType for a projected table column, without + // expanded nested ColumnDefinitions. In that case we can still preserve full materialization + // by walking table/file struct fields by ordinal. This is a fallback only: explicit + // ColumnDefinition children keep using the requested table-format matching rule, which is + // required for precise schema evolution. + if (allow_ordinal_fallback && table_child_idx < file_parent.children.size()) { + return &file_parent.children[table_child_idx]; + } + return nullptr; + } +} + +static ColumnDefinition synthetic_child_definition(const std::string& name, DataTypePtr type, + int32_t local_id) { + ColumnDefinition child; + child.identifier = Field::create_field(name); + child.local_id = local_id; + child.name = name; + child.type = std::move(type); + return child; +} + +static std::vector synthesize_complex_children_from_type( + const DataTypePtr& type) { + std::vector children; + if (type == nullptr) { + return children; + } + const auto nested_type = remove_nullable(type); + switch (nested_type->get_primitive_type()) { + case TYPE_ARRAY: { + const auto* array_type = assert_cast(nested_type.get()); + children.push_back(synthetic_child_definition("element", array_type->get_nested_type(), 0)); + break; + } + case TYPE_MAP: { + const auto* map_type = assert_cast(nested_type.get()); + children.push_back(synthetic_child_definition("key", map_type->get_key_type(), 0)); + children.push_back(synthetic_child_definition("value", map_type->get_value_type(), 1)); + break; + } + case TYPE_STRUCT: { + const auto* struct_type = assert_cast(nested_type.get()); + children.reserve(struct_type->get_elements().size()); + for (size_t idx = 0; idx < struct_type->get_elements().size(); ++idx) { + children.push_back(synthetic_child_definition(struct_type->get_element_name(idx), + struct_type->get_element(idx), + cast_set(idx))); + } + break; + } + default: + break; + } + return children; +} + +static bool has_table_child_named(const std::vector& children, + std::string_view name) { + return std::ranges::any_of(children, [&](const ColumnDefinition& child) { + return std::string_view(child.name) == name; + }); +} + +static void complete_required_complex_children_from_type(const DataTypePtr& type, + std::vector* children) { + DORIS_CHECK(children != nullptr); + if (type == nullptr) { + return; + } + const auto nested_type = remove_nullable(type); + switch (nested_type->get_primitive_type()) { + case TYPE_MAP: { + const auto* map_type = assert_cast(nested_type.get()); + // MAP key/value are structural children, not independently materializable table fields. + // A key-only projection can still be attached to a whole-map output root, for example: + // SELECT * FROM t WHERE ARRAY_CONTAINS(MAP_KEYS(new_map_column), 'person5') + // + // In that shape the scanner keeps the value stream readable, but the table projection can + // carry only the key child. Add the missing value child so recursive mapping can evolve the + // value type instead of letting TableReader cast old/new value structs directly. + if (has_table_child_named(*children, "key") && !has_table_child_named(*children, "value")) { + children->push_back(synthetic_child_definition("value", map_type->get_value_type(), 1)); + } + break; + } + case TYPE_ARRAY: + // ARRAY has only one required structural child (`element`), so a non-empty projection is + // already rooted at the element path. + break; + case TYPE_STRUCT: + // STRUCT children are real fields and must remain prunable. Completing missing struct + // fields here would turn `SELECT s.a` into a full-struct read and undo nested projection. + break; + default: + break; + } +} + +static Status validate_file_schema_children(const ColumnDefinition& file_field) { + if (file_field.type == nullptr) { + return Status::InternalError("File column '{}' has null type", file_field.name); + } + const auto nested_type = remove_nullable(file_field.type); + size_t expected_children = 0; + bool complex_with_fixed_children = true; + switch (nested_type->get_primitive_type()) { + case TYPE_ARRAY: + expected_children = 1; + break; + case TYPE_MAP: + expected_children = 2; + break; + case TYPE_STRUCT: + expected_children = + assert_cast(nested_type.get())->get_elements().size(); + break; + default: + complex_with_fixed_children = false; + break; + } + if (!complex_with_fixed_children || file_field.children.size() == expected_children) { + return Status::OK(); + } + return Status::InternalError( + "Malformed complex file schema for column '{}': type={}, expected_children={}, " + "actual_children={}", + file_field.name, file_field.type->get_name(), expected_children, + file_field.children.size()); +} + +static bool has_projected_file_children(const ColumnMapping& mapping) { + if (mapping.original_file_children.empty() || mapping.projected_file_children.empty()) { + return false; + } + if (mapping.original_file_children.size() != mapping.projected_file_children.size()) { + return true; + } + for (size_t idx = 0; idx < mapping.original_file_children.size(); ++idx) { + if (mapping.original_file_children[idx].file_local_id() != + mapping.projected_file_children[idx].file_local_id()) { + return true; + } + } + return false; +} + +static bool needs_nested_file_projection(const ColumnMapping& mapping) { + if (has_projected_file_children(mapping)) { + // Return True if the projected child column is missing / re-ordered + return true; + } + return std::ranges::any_of(mapping.child_mappings, [](const ColumnMapping& child_mapping) { + return needs_nested_file_projection(child_mapping); + }); +} + +static Status build_complex_projection(const ColumnMapping& mapping, LocalColumnIndex* projection); + +// Build the projected file children/type according to the pruned complex projection. For example, +// if we have a struct column `s` with children `id` and `name`, and the projection only keeps +// `s.name`, then the file reader should expose `STRUCT`. +static Status rebuild_projected_file_children_and_type( + const DataTypePtr& file_type, const std::vector& original_file_children, + const std::vector& child_mappings, + std::vector* projected_file_children, DataTypePtr* projected_type) { + DORIS_CHECK(file_type != nullptr); + DORIS_CHECK(projected_file_children != nullptr); + DORIS_CHECK(projected_type != nullptr); + ColumnDefinition field; + field.type = file_type; + field.children = original_file_children; + LocalColumnIndex projection = LocalColumnIndex::partial_local(-1); + projection.children.reserve(child_mappings.size()); + for (const auto* child_mapping : present_child_mappings_in_file_order(child_mappings)) { + DORIS_CHECK(child_mapping->file_local_id.has_value()); + LocalColumnIndex child_projection; + RETURN_IF_ERROR(build_complex_projection(*child_mapping, &child_projection)); + projection.children.push_back(std::move(child_projection)); + } + + ColumnDefinition projected_field; + RETURN_IF_ERROR(project_column_definition(field, projection, &projected_field)); + *projected_file_children = std::move(projected_field.children); + *projected_type = std::move(projected_field.type); + return Status::OK(); +} + +// Build the complex column projection according to the ColumnMapping which is re-ordered by the +// file-schema's order. +// +// For MAP, a partial projection represents value-subtree pruning only. The key child is not a +// projected output shape; file readers still read full keys to construct ColumnMap offsets and keep +// key semantics unchanged. If a caller tries to project only/prune the key child, the common schema +// projection helper rejects it. +static Status build_complex_projection(const ColumnMapping& mapping, LocalColumnIndex* projection) { + if (projection == nullptr) { + return Status::InvalidArgument("projection is null"); + } + DORIS_CHECK(mapping.file_local_id.has_value()); + *projection = LocalColumnIndex::local(*mapping.file_local_id); + projection->project_all_children = mapping.child_mappings.empty(); + projection->children.clear(); + const auto present_children = present_child_mappings_in_file_order(mapping.child_mappings); + if (!projection->project_all_children && present_children.empty()) { + // All requested table children under this complex node are missing/default-only. The file + // reader cannot expose an empty complex projection, but TableReader can still rematerialize + // the table shape from a full file subtree and fill the missing children with defaults. + projection->project_all_children = true; + return Status::OK(); + } + for (const auto* child_mapping : present_children) { + LocalColumnIndex child_projection; + RETURN_IF_ERROR(build_complex_projection(*child_mapping, &child_projection)); + projection->children.push_back(std::move(child_projection)); + } + if (!projection->project_all_children && projection->children.empty()) { + return Status::NotSupported("Projection for complex column {} contains no file children", + mapping.file_column_name); + } + return Status::OK(); +} + +using FilterProjectionMap = std::map; + +// Update the mapping's file type according to the projection, and determine whether the projection +// is trivial (i.e. the projected file type is the same as the table type, so no need to +// rematerialize the complex value back to table layout after reading from file). +static Status apply_projection_to_mapping_file_type(const LocalColumnIndex& projection, + ColumnMapping* mapping) { + DORIS_CHECK(mapping != nullptr); + if (mapping->original_file_type == nullptr) { + mapping->original_file_type = mapping->file_type; + } + if (mapping->original_file_type == nullptr || + !is_complex_type(remove_nullable(mapping->original_file_type)->get_primitive_type())) { + return Status::OK(); + } + ColumnDefinition field; + field.type = mapping->original_file_type; + field.children = mapping->original_file_children; + ColumnDefinition projected_field; + RETURN_IF_ERROR(project_column_definition(field, projection, &projected_field)); + mapping->file_type = std::move(projected_field.type); + mapping->projected_file_children = std::move(projected_field.children); + mapping->is_trivial = mapping_can_use_file_column_directly(*mapping); + return Status::OK(); +} + +static Status merge_filter_projection(const FilterProjectionMap* filter_projections, + LocalColumnIndex* projection) { + DORIS_CHECK(projection != nullptr); + if (filter_projections == nullptr) { + return Status::OK(); + } + const auto filter_projection_it = filter_projections->find(projection->column_id()); + if (filter_projection_it == filter_projections->end()) { + return Status::OK(); + } + // Merge predicate-only nested paths into the root projection that is about to be scanned. + // Example: `SELECT s.a WHERE s.b > 1` first builds the output projection `s -> a` from + // ColumnMapping, while build_nested_struct_filter_projection_map() records `s -> b`. This merge + // produces one file scan projection `s -> a,b`. + RETURN_IF_ERROR(merge_local_column_index(projection, filter_projection_it->second)); + return Status::OK(); +} + +static bool table_root_is_map(const ColumnMapping& mapping) { + if (mapping.table_type == nullptr) { + return false; + } + return remove_nullable(mapping.table_type)->get_primitive_type() == TYPE_MAP; +} + +static Status add_scan_column(FileScanRequest* file_request, ColumnMapping* mapping, + bool is_predicate_column, bool force_full_complex_scan_projection, + const FilterProjectionMap* filter_projections = nullptr) { + const auto file_column_id = LocalColumnId(mapping->file_local_id.value()); + LocalColumnIndex projection = LocalColumnIndex::top_level(file_column_id); + // Columnar readers can turn a complex mapping into a nested file projection, but + // row-oriented readers must scan the full top-level complex field because all children are + // encoded in the same text cell. + if (!force_full_complex_scan_projection && needs_nested_file_projection(*mapping)) { + RETURN_IF_ERROR(build_complex_projection(*mapping, &projection)); + } + if (is_predicate_column && !force_full_complex_scan_projection) { + DCHECK(filter_projections != nullptr); + // If a projected complex root is also used by a predicate, rebuild the predicate scan + // projection from the output mapping before merging predicate-only children. For + // `SELECT s.a WHERE s.b > 1`, build_complex_projection() produces `s -> a` and + // merge_filter_projection() adds `s -> b`, so the predicate column reads both children. + RETURN_IF_ERROR(merge_filter_projection(filter_projections, &projection)); + } + FileScanRequestBuilder builder(file_request); + if (is_predicate_column) { + return builder.add_predicate_column(std::move(projection)); + } + return builder.add_non_predicate_column(std::move(projection)); +} + +static const LocalColumnIndex* find_scan_projection( + const std::vector& scan_columns, LocalColumnId file_column_id) { + const auto projection_it = + std::ranges::find_if(scan_columns, [&](const LocalColumnIndex& projection) { + return projection.column_id() == file_column_id; + }); + return projection_it == scan_columns.end() ? nullptr : &*projection_it; +} + +// Apply the final scan projection of one root file column back to its ColumnMapping. This updates +// mapping.file_type/projected_file_children from the original file schema to the exact shape that +// FileReader will return. +// +// Example: for `SELECT s.a WHERE s.b > 1`, add_scan_column() keeps only one predicate scan +// projection `s -> a,b`. Applying that projection changes the mapping's file type from the full +// file struct `s` to the projected file struct `s`, so later filter rewrite and +// TableReader final materialization use the same column shape as the file-local block. +static Status apply_scan_projection_to_mapping_file_type(const FileScanRequest& file_request, + ColumnMapping* mapping) { + DORIS_CHECK(mapping != nullptr); + DORIS_CHECK(mapping->file_local_id.has_value()); + const auto file_column_id = LocalColumnId(*mapping->file_local_id); + // Predicate columns are the actual scan projection when a column is used by row-level filters: + // add_scan_column() removes the duplicate non-predicate projection in that case. + const auto* projection = find_scan_projection(file_request.predicate_columns, file_column_id); + if (projection == nullptr) { + projection = find_scan_projection(file_request.non_predicate_columns, file_column_id); + } + DORIS_CHECK(projection != nullptr); + return apply_projection_to_mapping_file_type(*projection, mapping); +} + +// Build extra scan projections required only by row-level filters on nested struct children. +// +// Example: for `SELECT s.a FROM t WHERE s.b.c > 1`, the output projection may only contain `s.a`, +// but the file reader must also read `s.b.c` to evaluate the predicate. This function collects the +// table-side filter path, resolves it through ColumnMapping first, and records the corresponding +// file-side projection in filter_projections. This keeps renamed fields consistent across the scan +// projection, row-level conjunct rewrite, and nested predicate pruning. Example: +// table filter path: s -> renamed_b -> c +// old file path: s -> b -> c +// recorded path: s -> b -> c +// When add_scan_column() adds the same root as a predicate column, it rebuilds that root from the +// output mapping, merges this filter-only projection into it, and removes the duplicate +// non-predicate root entry. +static Status build_nested_struct_filter_projection_map( + const std::vector& table_filters, const std::vector& mappings, + FilterProjectionMap* filter_projections) { + DORIS_CHECK(filter_projections != nullptr); + filter_projections->clear(); + for (const auto& table_filter : table_filters) { + if (table_filter.conjunct == nullptr) { + continue; + } + // Collect all nested struct paths in the table filter. For example, for + // `s.id > 5 AND element_at(s, 'renamed_name') = 'abc'`, collect the table paths + // `s -> id` and `s -> renamed_name`, then resolve each one to its file-side projection. + std::vector paths; + collect_nested_struct_paths(table_filter.conjunct->root(), &paths); + for (const auto& path : paths) { + auto mapping_it = std::ranges::find_if(mappings, [&](const ColumnMapping& mapping) { + return mapping.global_index == path.root_global_index; + }); + if (mapping_it == mappings.end() || !mapping_it->file_local_id.has_value() || + path.selectors.empty()) { + continue; + } + + ResolvedNestedStructPath resolved; + LocalColumnIndex root_projection; + if (!resolve_nested_struct_path_for_file(path, mappings, &resolved)) { + if (!table_root_is_map(*mapping_it)) { + continue; + } + // Direct map value filters such as `m.value.a > 1` need the value leaf for row + // evaluation even when the query only projects another value child. This is only a + // scan projection fallback; complex map/array expressions are still not rewritten + // into file-local conjuncts. + LocalColumnIndex child_projection; + RETURN_IF_ERROR(build_file_child_projection_from_schema( + mapping_it->original_file_children, path.selectors, &child_projection)); + if (child_projection.local_id() < 0) { + continue; + } + root_projection = LocalColumnIndex::partial_local(*mapping_it->file_local_id); + root_projection.children.push_back(std::move(child_projection)); + } else { + root_projection = std::move(resolved.file_projection); + } + auto filter_projection_it = filter_projections->find(root_projection.column_id()); + if (filter_projection_it == filter_projections->end()) { + filter_projections->emplace(root_projection.column_id(), + std::move(root_projection)); + continue; + } + RETURN_IF_ERROR( + merge_local_column_index(&filter_projection_it->second, root_projection)); + } + } + return Status::OK(); +} + +static void rebuild_projection(ColumnMapping* mapping, LocalIndex block_position) { + DORIS_CHECK(mapping->file_local_id.has_value()); + if (mapping->is_trivial || needs_complex_rematerialize(*mapping)) { + mapping->projection = VExprContext::create_shared(VSlotRef::create_shared( + cast_set(block_position.value()), cast_set(block_position.value()), -1, + mapping->file_type, mapping->file_column_name)); + return; + } + + auto expr = Cast::create_shared(mapping->table_type); + expr->add_child(VSlotRef::create_shared(cast_set(block_position.value()), + cast_set(block_position.value()), -1, + mapping->file_type, mapping->file_column_name)); + mapping->projection = VExprContext::create_shared(expr); +} + +// Build file slot rewrite info from the localized filter targets. Only local targets can enter +// file-reader expressions; constant and unset targets stay above the file reader. +static std::map build_file_slot_rewrite_map( + const std::vector& mappings, + const std::map& filter_entries) { + std::map global_to_file_slot; + for (const auto& mapping : mappings) { + const auto entry_it = filter_entries.find(mapping.global_index); + if (entry_it == filter_entries.end() || !entry_it->second.is_local()) { + continue; + } + DORIS_CHECK(mapping.file_local_id.has_value()); + global_to_file_slot.emplace( + mapping.global_index, + FileSlotRewriteInfo {.block_position = entry_it->second.local_index().value(), + .file_type = mapping.file_type, + .table_type = mapping.table_type, + .file_column_name = mapping.file_column_name}); + } + return global_to_file_slot; +} + +Status TableColumnMapper::_create_by_index_mapping(const ColumnDefinition& table_column, + const std::vector& file_schema, + ColumnMapping* mapping) { + DORIS_CHECK(mapping != nullptr); + DORIS_CHECK(!table_column.is_partition_key); + + // Key contract: in BY_INDEX mode, `ColumnDefinition::identifier` TYPE_INT is interpreted as the + // 0-based position of this column inside `file_schema`. FE writes the physical file position + // of each non-partition projected column into that identifier. This interpretation allows: + // - sparse projection: read only a subset of file columns (for example only `_col2` + // and `_col4`); + // - column reordering: table column order differs from file column order; + // - no many-to-one mapping: FE must guarantee that each file position is referenced by at + // most one table column. + const auto file_index = table_column.get_identifier_position(); + + // Case A: file_index is in range, so build a direct positional mapping. + // The file column name (for example `_col0`) is intentionally ignored here. + if (file_index >= 0 && static_cast(file_index) < file_schema.size()) { + return _create_direct_mapping(table_column, file_schema[static_cast(file_index)], + mapping); + } + + // Case B: file_index is out of range, which means the file does not contain this column. + // Route it through the missing-column path used by schema evolution. + if (table_column.default_expr != nullptr) { + _set_constant_mapping(mapping, table_column.default_expr); + return Status::OK(); + } + // Keep the mapping empty (`file_local_id` remains `nullopt`) and let the upper finalize + // stage fill NULL/default values. + return Status::OK(); +} + +void TableColumnMapper::_set_constant_mapping(ColumnMapping* mapping, VExprContextSPtr expr) { + DORIS_CHECK(mapping != nullptr); + DORIS_CHECK(expr != nullptr); + mapping->default_expr = std::move(expr); + mapping->constant_index = _constant_map.add(ConstantEntry { + .global_index = mapping->global_index, + .expr = mapping->default_expr, + .type = mapping->table_type, + }); + mapping->filter_conversion = FilterConversionType::CONSTANT; +} + +Status TableColumnMapper::_create_mapping_for_column(const ColumnDefinition& table_column, + GlobalIndex global_index, + ColumnMapping* mapping) { + DORIS_CHECK(mapping != nullptr); + *mapping = ColumnMapping {}; + mapping->global_index = global_index; + mapping->table_column_name = table_column.name; + mapping->table_type = table_column.type; + const auto row_lineage_type = row_lineage_virtual_column_type(table_column, _options.mode); + if (const auto* partition_value = find_partition_value(table_column, _partition_values); + table_column.is_partition_key && partition_value != nullptr) { + // Partition values are split constants and must take precedence over defaults. + _set_constant_mapping(mapping, VExprContext::create_shared(VLiteral::create_shared( + mapping->table_type, *partition_value))); + } else if (_options.mode == TableColumnMappingMode::BY_INDEX && + !table_column.is_partition_key && table_column.has_identifier_field_id()) { + // BY_INDEX interprets ColumnDefinition::identifier as physical file position. + RETURN_IF_ERROR(_create_by_index_mapping(table_column, _file_schema, mapping)); + } else if (const auto* file_field = _find_file_field(table_column, _file_schema)) { + // Normal physical file column mapping. + RETURN_IF_ERROR(_create_direct_mapping(table_column, *file_field, mapping)); + if (row_lineage_type != TableVirtualColumnType::INVALID) { + // Iceberg v3 rewritten files may physically contain row lineage metadata fields. + // File non-null values must be preserved, while file NULLs still inherit from data file + // metadata in IcebergTableReader. Therefore the mapping has a real file source plus a + // virtual post-materialization step, and filters must wait for finalize output. + mapping->virtual_column_type = row_lineage_type; + mapping->filter_conversion = FilterConversionType::FINALIZE_ONLY; + } + } else if (row_lineage_type != TableVirtualColumnType::INVALID) { + // Iceberg row lineage metadata fields are optional in data files. Missing fields are exposed + // as all-NULL table columns first; IcebergTableReader fills inherited values only when the + // split carries first_row_id / last_updated_sequence_number metadata. + // FE may attach a default_expr to these hidden metadata columns, but the Iceberg v3 + // inheritance rule must take precedence over the generic missing-column default path. + mapping->virtual_column_type = row_lineage_type; + } else if (table_column.name == BeConsts::ICEBERG_ROWID_COL) { + // Doris internal Iceberg row locator is never a physical Iceberg data column. It is built + // from file path, row position and partition metadata for delete/update/merge. + mapping->virtual_column_type = TableVirtualColumnType::ICEBERG_ROWID; + } else if (table_column.default_expr != nullptr) { + // Missing schema-evolution column with an explicit default expression. + _set_constant_mapping(mapping, table_column.default_expr); + } else { + if (table_column.is_partition_key) { + return Status::InvalidArgument( + "Table column '{}' (global_index={}) does not have a matching partition value", + table_column.name, mapping->global_index.value()); + } + } + return Status::OK(); +} + +Status TableColumnMapper::_create_hidden_filter_mapping(const ColumnDefinition& table_column, + GlobalIndex global_index, + ColumnMapping* mapping) { + auto status = _create_mapping_for_column(table_column, global_index, mapping); + if (mapping->file_local_id.has_value() || mapping->constant_index.has_value() || + mapping->virtual_column_type != TableVirtualColumnType::INVALID) { + return Status::OK(); + } + if (_options.mode == TableColumnMappingMode::BY_NAME) { + return status; + } + + // Predicate-only slot refs carry the table name/type but do not carry the table-format field + // id used by BY_FIELD_ID or the file position used by BY_INDEX. Use a name fallback only for + // hidden filter localization; projected columns still obey the requested mapping mode. + const auto* file_field = + matcher_for_mode(TableColumnMappingMode::BY_NAME).find(table_column, _file_schema); + if (file_field == nullptr) { + return status; + } + ColumnMapping fallback_mapping; + fallback_mapping.global_index = global_index; + fallback_mapping.table_column_name = table_column.name; + fallback_mapping.table_type = table_column.type; + RETURN_IF_ERROR(_create_direct_mapping(table_column, *file_field, &fallback_mapping)); + *mapping = std::move(fallback_mapping); + return Status::OK(); +} + +Status TableColumnMapper::_build_hidden_filter_mappings( + const std::vector& table_filters) { + _hidden_mappings.clear(); + + std::map filter_columns; + for (const auto& table_filter : table_filters) { + if (table_filter.conjunct != nullptr) { + collect_top_level_slot_columns(table_filter.conjunct->root(), &filter_columns); + } + } + + // TableColumnPredicates only carry GlobalIndex and predicate objects. They do not provide the + // top-level column name/type needed to build a hidden mapping, so a predicate-only column can + // be hidden-mapped only when the same root slot also appears in a conjunct. + for (const auto& [global_index, table_column] : filter_columns) { + if (_find_mapping(global_index) != nullptr) { + // Ignore columns that are already mapped by the projected columns + continue; + } + ColumnMapping mapping; + RETURN_IF_ERROR(_create_hidden_filter_mapping(table_column, global_index, &mapping)); + if (mapping.file_local_id.has_value() || mapping.constant_index.has_value() || + mapping.virtual_column_type != TableVirtualColumnType::INVALID) { + _hidden_mappings.push_back(std::move(mapping)); + } + } + return Status::OK(); +} + +Status TableColumnMapper::create_mapping(const std::vector& projected_columns, + const std::map& partition_values, + const std::vector& file_schema) { + clear(); + _partition_values = partition_values; + _file_schema = file_schema; + for (size_t column_idx = 0; column_idx < projected_columns.size(); ++column_idx) { + ColumnMapping mapping; + RETURN_IF_ERROR(_create_mapping_for_column(projected_columns[column_idx], + GlobalIndex(column_idx), &mapping)); + _mappings.push_back(std::move(mapping)); + } + return Status::OK(); +} + +std::vector TableColumnMapper::_filter_visible_mappings() const { + std::vector mappings; + mappings.reserve(_mappings.size() + _hidden_mappings.size()); + mappings.insert(mappings.end(), _mappings.begin(), _mappings.end()); + mappings.insert(mappings.end(), _hidden_mappings.begin(), _hidden_mappings.end()); + return mappings; +} + +Status TableColumnMapper::_build_filter_entries(const FileScanRequest& file_request) { + _filter_entries.clear(); + const auto mappings = _filter_visible_mappings(); + for (const auto& mapping : mappings) { + FilterEntry entry; + if (mapping.constant_index.has_value()) { + entry = FilterEntry::constant(*mapping.constant_index); + } else if (mapping.file_local_id.has_value() && + filter_conversion_has_local_source(mapping.filter_conversion)) { + const auto local_position_it = + file_request.local_positions.find(LocalColumnId(*mapping.file_local_id)); + if (local_position_it != file_request.local_positions.end()) { + entry = FilterEntry::local(local_position_it->second); + } + } + _filter_entries.emplace(mapping.global_index, entry); + } + return Status::OK(); +} + +Status TableColumnMapper::create_scan_request( + const std::vector& table_filters, + const TableColumnPredicates& table_column_predicates, + const std::vector& projected_columns, FileScanRequest* file_request, + RuntimeState* runtime_state) { + // FileReader evaluates expressions against a file-local block. This mapper owns the + // table-column to file-column conversion, so it also owns the file-local block positions. + file_request->predicate_columns.clear(); + file_request->non_predicate_columns.clear(); + file_request->local_positions.clear(); + file_request->conjuncts.clear(); + file_request->delete_conjuncts.clear(); + file_request->column_predicate_filters.clear(); + _filter_entries.clear(); + // 1. Build referenced non-predicate columns + for (size_t column_idx = 0; column_idx < projected_columns.size(); ++column_idx) { + const auto global_index = GlobalIndex(column_idx); + auto* mapping = _find_mapping(global_index); + if (mapping != nullptr && mapping->file_local_id.has_value()) { + // A file column can be read lazily as a non-predicate column only when it is not used + // by row-level expression filters. Single-column ColumnPredicate filters are pruning + // hints only and must not force row-level predicate materialization. + bool used_by_filter = false; + for (const auto& table_filter : table_filters) { + const auto& global_indices = table_filter.global_indices; + if (std::find(global_indices.begin(), global_indices.end(), global_index) != + global_indices.end() && + filter_conversion_has_local_source(mapping->filter_conversion)) { + used_by_filter = true; + break; + } + } + if (!used_by_filter || !enable_lazy_materialization()) { + RETURN_IF_ERROR(add_scan_column(file_request, mapping, false, + force_full_complex_scan_projection())); + } + } + } + // 2. Build referenced predicate columns + // Hidden filter mappings must be built before localizing filters, so that they can be localized together with visible mappings and referenced by localized filter expressions. + RETURN_IF_ERROR(_build_hidden_filter_mappings(table_filters)); + RETURN_IF_ERROR( + localize_filters(table_filters, table_column_predicates, file_request, runtime_state)); + // 3. Rebuild output projection expressions for projected columns. localize_filters() has + // already applied the final scan projection to mapping.file_type/projected_file_children before + // rewriting filter expressions. + for (auto& mapping : _mappings) { + if (!mapping.file_local_id.has_value()) { + continue; + } + auto position_it = + file_request->local_positions.find(LocalColumnId(*mapping.file_local_id)); + DORIS_CHECK(position_it != file_request->local_positions.end()) + << file_request->local_positions.size() << " " << *mapping.file_local_id << " " + << mapping.file_column_name; + rebuild_projection(&mapping, position_it->second); + } + return Status::OK(); +} + +ColumnMapping* TableColumnMapper::_find_mapping(GlobalIndex global_index) { + for (auto& mapping : _mappings) { + if (mapping.global_index == global_index) { + return &mapping; + } + } + return nullptr; +} + +ColumnMapping* TableColumnMapper::_find_filter_mapping(GlobalIndex global_index) { + if (auto* mapping = _find_mapping(global_index); mapping != nullptr) { + return mapping; + } + for (auto& mapping : _hidden_mappings) { + if (mapping.global_index == global_index) { + return &mapping; + } + } + return nullptr; +} + +Status TableColumnMapper::localize_filters(const std::vector& table_filters, + const TableColumnPredicates& table_column_predicates, + FileScanRequest* file_request, + RuntimeState* runtime_state) { + FilterProjectionMap filter_projections; + auto filter_mappings = _filter_visible_mappings(); + RETURN_IF_ERROR(build_nested_struct_filter_projection_map(table_filters, filter_mappings, + &filter_projections)); + for (const auto& table_filter : table_filters) { + for (const auto& global_index : table_filter.global_indices) { + auto* mapping = _find_filter_mapping(global_index); + if (mapping == nullptr || !mapping->file_local_id.has_value() || + !filter_conversion_has_local_source(mapping->filter_conversion)) { + continue; + } + RETURN_IF_ERROR(add_scan_column(file_request, mapping, enable_lazy_materialization(), + force_full_complex_scan_projection(), + &filter_projections)); + } + } + // Rebuild the file type for every scan-local mapping before expression rewrite. Predicate-only + // hidden mappings must see the same projected file type as the file reader will produce. + for (auto& mapping : _mappings) { + if (mapping.file_local_id.has_value() && + file_request->local_positions.contains(LocalColumnId(*mapping.file_local_id))) { + RETURN_IF_ERROR(apply_scan_projection_to_mapping_file_type(*file_request, &mapping)); + } + } + for (auto& mapping : _hidden_mappings) { + if (mapping.file_local_id.has_value() && + file_request->local_positions.contains(LocalColumnId(*mapping.file_local_id))) { + RETURN_IF_ERROR(apply_scan_projection_to_mapping_file_type(*file_request, &mapping)); + } + } + RETURN_IF_ERROR(_build_filter_entries(*file_request)); + + // Build the complete table-slot rewrite map after all predicate columns have been assigned. + // This keeps expression localization independent from filter iteration order. + filter_mappings = _filter_visible_mappings(); + const auto global_to_file_slot = build_file_slot_rewrite_map(filter_mappings, _filter_entries); + for (const auto& table_filter : table_filters) { + if (table_filter.conjunct != nullptr && + table_filter_has_only_local_entries(table_filter, _filter_entries)) { + RewriteContext rewrite_context {.runtime_state = runtime_state}; + VExprSPtr rewrite_root; + Status clone_status; + try { + clone_status = clone_table_expr_tree(table_filter.conjunct->root(), &rewrite_root); + } catch (const Exception& e) { + // Some table filters contain complex intermediate values, for example + // `element_at(MAP_VALUES(m)[1], 'age') > 30`. The current file-local rewrite only + // understands top-level slots and struct-element paths rooted at top-level slots; + // cloning such expressions can hit the generic TExpr complex-type limitation. + // Leave them above TableReader, where Scanner evaluates the original table-level + // conjunct after final materialization. +#ifndef NDEBUG + return Status::InternalError( + "Failed to clone table filter for file-local rewrite: {}, expr={}", + e.to_string(), table_filter.conjunct->root()->debug_string()); +#else + continue; +#endif + } catch (const std::exception& e) { +#ifndef NDEBUG + return Status::InternalError( + "Failed to clone table filter for file-local rewrite: {}, expr={}", + e.what(), table_filter.conjunct->root()->debug_string()); +#else + continue; +#endif + } + if (!clone_status.ok()) { +#ifndef NDEBUG + return Status::InternalError( + "Failed to clone table filter for file-local rewrite: {}, expr={}", + clone_status.to_string(), table_filter.conjunct->root()->debug_string()); +#else + continue; +#endif + } + bool can_localize = true; + auto localized_root = rewrite_table_expr_to_file_expr(rewrite_root, global_to_file_slot, + filter_mappings, &rewrite_context, + &can_localize); + if (!can_localize) { + continue; + } + auto localized_conjunct = VExprContext::create_shared(std::move(localized_root)); + RETURN_IF_ERROR(rewrite_context.prepare_created_exprs(localized_conjunct.get())); + file_request->conjuncts.push_back(std::move(localized_conjunct)); + } + } + if (enable_column_predicate_filters()) { + for (const auto& [global_index, predicates] : table_column_predicates) { + const auto* mapping = _find_filter_mapping(global_index); + const auto entry_it = _filter_entries.find(global_index); + if (mapping == nullptr || !mapping->file_local_id.has_value() || predicates.empty() || + entry_it == _filter_entries.end() || !entry_it->second.is_local() || + !column_predicate_can_use_local_source(mapping->filter_conversion) || + mapping->file_type == nullptr) { + continue; + } + FileColumnPredicateFilter column_predicate_filter; + column_predicate_filter.file_column_id = LocalColumnId(*mapping->file_local_id); + column_predicate_filter.target = + FileNestedPredicateTarget(column_predicate_filter.file_column_id); + const auto file_primitive_type = + remove_nullable(mapping->file_type)->get_primitive_type(); + for (const auto& predicate : predicates) { + DORIS_CHECK(predicate != nullptr); + if (predicate->primitive_type() == file_primitive_type) { + column_predicate_filter.predicates.push_back(predicate); + } + } + if (column_predicate_filter.predicates.empty()) { + continue; + } + file_request->column_predicate_filters.push_back(std::move(column_predicate_filter)); + } + for (const auto& table_filter : table_filters) { + if (table_filter.conjunct == nullptr || + !table_filter_has_only_local_entries(table_filter, _filter_entries)) { + continue; + } + std::vector nested_column_predicate_filters; + collect_nested_column_predicate_filters(table_filter.conjunct->root(), filter_mappings, + &nested_column_predicate_filters); + for (auto& column_predicate_filter : nested_column_predicate_filters) { + merge_column_predicate_filter(std::move(column_predicate_filter), + &file_request->column_predicate_filters); + } + } + } + return Status::OK(); +} + +const ColumnDefinition* TableColumnMapper::_find_file_field( + const ColumnDefinition& table_column, + const std::vector& file_schema) const { + if (table_column.name.starts_with(BeConsts::GLOBAL_ROWID_COL)) { + const auto field_it = std::ranges::find_if(file_schema, [](const ColumnDefinition& field) { + return field.column_type == ColumnType::GLOBAL_ROWID; + }); + return field_it == file_schema.end() ? nullptr : &*field_it; + } + return matcher_for_mode(_options.mode).find(table_column, file_schema); +} + +Status TableColumnMapper::_create_direct_mapping(const ColumnDefinition& table_column, + const ColumnDefinition& file_field, + ColumnMapping* mapping) const { + DORIS_CHECK(mapping != nullptr); + DORIS_CHECK(file_field.local_id >= 0 || file_field.local_id == GLOBAL_ROWID_COLUMN_ID); + mapping->file_local_id = file_field.local_id; + mapping->table_column_name = table_column.name; + mapping->file_column_name = file_field.name; + mapping->original_file_type = file_field.type; + mapping->original_file_children = file_field.children; + mapping->projected_file_children = file_field.children; + mapping->file_type = file_field.type; + mapping->is_trivial = mapping_can_use_file_column_directly(*mapping); + mapping->filter_conversion = mapping->is_trivial ? FilterConversionType::COPY_DIRECTLY + : FilterConversionType::CAST_FILTER; + mapping->child_mappings.clear(); + + auto table_children = table_column.children; + const auto nested_table_type = remove_nullable(mapping->table_type); + // Some scan paths, especially SELECT *, only carry the complete complex DataType for a table + // column and leave ColumnDefinition::children empty. If the file type is an older complex + // schema, treating this as a leaf mapping would make TableReader fall back to a plain CAST. + // That is invalid for evolved structs with different field counts. + // + // Example: + // table column type: Map(String, Struct(age, full_name, gender)) + // old file type: Map(String, Struct(age, name)) + // table children: empty + // + // Synthesize key/value/struct-field children from the table type so the normal recursive + // mapping path can rematerialize `name -> full_name` and fill missing `gender` with defaults, + // instead of trying to CAST Struct(age, name) to Struct(age, full_name, gender). + const bool synthesized_table_children = + table_children.empty() && is_complex_type(nested_table_type->get_primitive_type()) && + !mapping->table_type->equals(*mapping->file_type); + if (synthesized_table_children) { + table_children = synthesize_complex_children_from_type(mapping->table_type); + } else if (!table_children.empty() && !mapping->table_type->equals(*mapping->file_type)) { + complete_required_complex_children_from_type(mapping->table_type, &table_children); + } + + if (!table_children.empty()) { + if (!is_complex_type(remove_nullable(mapping->file_type)->get_primitive_type())) { + return Status::NotSupported( + "Cannot map complex table column '{}' to scalar parquet column '{}', table " + "type={}, file type={}", + table_column.name, file_field.name, mapping->table_type->get_name(), + mapping->file_type->get_name()); + } + RETURN_IF_ERROR(validate_file_schema_children(file_field)); + std::vector synthesized_used_file_child_ids; + for (size_t table_child_idx = 0; table_child_idx < table_children.size(); + ++table_child_idx) { + const auto& table_child = table_children[table_child_idx]; + const auto* file_child = + find_file_child_for_mapping(table_child, file_field, _options.mode, + table_child_idx, synthesized_table_children); + if (synthesized_table_children && file_child != nullptr) { + const auto file_child_id = file_child->file_local_id(); + if (std::ranges::find(synthesized_used_file_child_ids, file_child_id) != + synthesized_used_file_child_ids.end()) { + file_child = nullptr; + for (const auto& candidate : file_field.children) { + const auto candidate_id = candidate.file_local_id(); + if (std::ranges::find(synthesized_used_file_child_ids, candidate_id) == + synthesized_used_file_child_ids.end()) { + file_child = &candidate; + break; + } + } + } + if (file_child != nullptr) { + synthesized_used_file_child_ids.push_back(file_child->file_local_id()); + } + } + if (file_child == nullptr) { + ColumnMapping child_mapping; + child_mapping.table_column_name = table_child.name; + child_mapping.file_column_name = table_child.name; + child_mapping.table_type = table_child.type; + child_mapping.file_type = table_child.type; + child_mapping.filter_conversion = FilterConversionType::FINALIZE_ONLY; + mapping->child_mappings.push_back(std::move(child_mapping)); + continue; + } + ColumnMapping child_mapping; + child_mapping.table_column_name = table_child.name; + child_mapping.table_type = table_child.type; + RETURN_IF_ERROR(_create_direct_mapping(table_child, *file_child, &child_mapping)); + mapping->child_mappings.push_back(std::move(child_mapping)); + } + if (needs_projected_file_type_rebuild(*mapping)) { + // If complex projection prunes some children, we have to rebuild the projected file type to make sure the reader expression can find the correct child types by name. + RETURN_IF_ERROR(rebuild_projected_file_children_and_type( + mapping->file_type, mapping->original_file_children, mapping->child_mappings, + &mapping->projected_file_children, &mapping->file_type)); + DCHECK(mapping->table_type != nullptr); + mapping->is_trivial = mapping_can_use_file_column_directly(*mapping); + // TODO: ? READER_EXPRESSION + mapping->filter_conversion = mapping->is_trivial + ? FilterConversionType::COPY_DIRECTLY + : FilterConversionType::READER_EXPRESSION; + } + } + return Status::OK(); +} + +} // namespace doris::format diff --git a/be/src/format_v2/column_mapper.h b/be/src/format_v2/column_mapper.h new file mode 100644 index 00000000000000..2ffbbbb9414d83 --- /dev/null +++ b/be/src/format_v2/column_mapper.h @@ -0,0 +1,294 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "core/data_type/data_type.h" +#include "core/field.h" +#include "exprs/vexpr_fwd.h" +#include "format_v2/file_reader.h" + +namespace doris { +class ColumnPredicate; +class RuntimeState; +} // namespace doris + +namespace doris::format { + +struct ColumnDefinition; +struct TableFilter; + +// Table-level simple predicates grouped by table/global output position. The key is not +// LocalColumnId: TableColumnMapper resolves it through ColumnMapping before creating file pruning +// hints. +using TableColumnPredicates = std::map>>; + +enum class TableColumnMappingMode { + // Match by ColumnDefinition::identifier TYPE_INT as field id. + BY_FIELD_ID, + // Match by ColumnDefinition::identifier TYPE_STRING, or logical name when identifier is null. + BY_NAME, + // Match top-level columns by file position. This mainly serves Hive1 ORC style files whose + // column names are placeholder values such as `_col0` / `_col1`, where position is the only + // reliable way to select the correct column. + BY_INDEX, +}; + +enum TableVirtualColumnType { + INVALID = 0, // not a virtual column + // Iceberg v3 row lineage metadata column `_row_id`. Physical non-null values + // are preserved; NULL or missing values inherit first_row_id + row_position. + ROW_ID = 1, + // Iceberg v3 row lineage metadata column `_last_updated_sequence_number`. + // Physical non-null values are preserved; NULL or missing values inherit the + // data file's last_updated_sequence_number. + LAST_UPDATED_SEQUENCE_NUMBER = 2, + // Doris internal Iceberg row locator column `__DORIS_ICEBERG_ROWID_COL__`. + // It is a struct used by delete/update/merge, not the Iceberg `_row_id`. + ICEBERG_ROWID = 3, +}; + +enum class FilterConversionType { + COPY_DIRECTLY, // filter can be copied directly from file layer without any change, e.g. column type and table type are the same and no complex nested projection is involved. + CAST_FILTER, // filter can be converted from file layer by adding a cast, e.g. column type is nullable but table type is not, or file column has a trivial nested projection but table column has a complex nested projection. + READER_EXPRESSION, + FINALIZE_ONLY, // filter cannot be converted to file layer and should be evaluated at table reader finalize phase, e.g. predicates on ICEBERG_ROW_ID column which is generated by IcebergReader. + CONSTANT, +}; + +// Nested global-to-local child mapping. The root index points either to a request-local slot or to +// a child id, depending on the owner. child_mapping keeps the recursive table-child to file-child +// relationship explicit instead of encoding it in ColumnMapping flags. +struct IndexMapping { + int32_t index = -1; + std::map> child_mapping; +}; + +// Recursive result produced after one table/global column is assigned to a file-local source. +struct ColumnMapResult { + std::optional local_column_id; + std::optional column_index; + std::optional mapping; +}; + +// Final mapping entry from one global result column to one file-local source. +struct ColumnMapEntry { + IndexMapping mapping; + DataTypePtr local_type; + DataTypePtr global_type; + FilterConversionType filter_conversion = FilterConversionType::FINALIZE_ONLY; +}; + +// Collection of final result-column mappings produced for one file/split. +struct ResultColumnMapping { + std::map global_to_local; +}; + +// Mapping result from one table column to one file column. +// This is the main boundary object between table-level schema semantics and file-local schema +// semantics. +struct ColumnMapping { + // Position of the top-level projected column in the table/global output block. Table-level + // filters and column predicates refer to this index after FileScannerV2 translates FE ids at + // the scanner boundary. + GlobalIndex global_index; + std::string table_column_name; + // File-reader local id for the mapped node. + // + // For a root mapping it is convertible to LocalColumnId. For a nested mapping it is the + // LocalColumnIndex child id under the parent projection. This is deliberately separated from + // ColumnDefinition::identifier, which is the table-to-file matching key such as Parquet/Iceberg + // field_id or column name. + // + // Empty means the table column is constant, missing, partition-only, or virtual. + std::optional file_local_id; + std::string file_column_name; + // Full file type/children before nested projection pruning. Used to rebuild projected types + // and to localize nested filters that reference children not present in the output projection. + DataTypePtr original_file_type; + std::vector original_file_children; + // File children after applying the scan projection. The order follows the file-local semantic + // schema, not table child order. TableReader uses this to map table-output children back to the + // file-local block layout when projection, predicate-only children, and schema evolution mix. + std::vector projected_file_children; + // Split/file-local constant entry when this mapping is produced from partition/default/virtual + // expression instead of physical file data. + std::optional constant_index; + // Effective file type after applying casts/remaps/nested projection pruning. + DataTypePtr file_type; + // Target table/global type after final materialization. + DataTypePtr table_type; + + // Final projection expression used to convert file-local values into table/global values, such + // as casts, defaults, partition values, generated columns, or complex-column remaps. + VExprContextSPtr projection; + + // Mapping tree for nested table children. The order follows table output children, while file + // children can be pruned/reordered through each child mapping's file-reader local id. + std::vector child_mappings; + // True when file value can be used directly as table value without cast or child remap. + bool is_trivial = false; + // How filters referencing this table/global column can be converted below table-reader + // finalize. This is metadata for localize_filters() and future constant-filter evaluation. + FilterConversionType filter_conversion = FilterConversionType::FINALIZE_ONLY; + TableVirtualColumnType virtual_column_type = TableVirtualColumnType::INVALID; + VExprContextSPtr default_expr; + + std::string debug_string() const; +}; + +struct TableColumnMapperOptions { + TableColumnMappingMode mode = TableColumnMappingMode::BY_FIELD_ID; + + std::string debug_string() const; +}; + +Status clone_table_expr_tree(const VExprSPtr& expr, VExprSPtr* cloned_expr); +const Field* find_partition_value(const ColumnDefinition& table_column, + const std::map& partition_values); + +// Generic mapping layer from table schema to file schema. +// Iceberg uses BY_FIELD_ID. Plain by-name formats can reuse this component as well, so keep this +// abstraction table-format neutral instead of making it Iceberg-only. +class TableColumnMapper { +public: + explicit TableColumnMapper(TableColumnMapperOptions options = {}) + : _options(std::move(options)) {} + virtual ~TableColumnMapper() = default; + + // Build column mappings from table schema to file schema. + // The resulting ColumnMapping describes how each table column is produced from a file column, + // a constant, or an expression. Later projection, filter localization, and table-block + // finalization should all reuse the same mapping. + virtual Status create_mapping(const std::vector& projected_columns, + const std::map& partition_values, + const std::vector& file_schema); + + // Convert a table-level scan request into a file-local scan request. table_filters preserve + // row-level filtering semantics and are rewritten as file-local conjuncts. table_column_predicates + // are converted only into file-layer pruning hints and do not participate in batch row + // filtering. + virtual Status create_scan_request(const std::vector& table_filters, + const TableColumnPredicates& table_column_predicates, + const std::vector& projected_columns, + FileScanRequest* file_request, + RuntimeState* runtime_state = nullptr); + + // Localize table-level filters to the file schema. + // Trivial mappings can copy structured predicates directly. Type changes may be localized with + // a safe cast. Expressions that cannot be pushed down safely should be handled through + // reader_expression_map or table-level finalize/filter fallback. + virtual Status localize_filters(const std::vector& table_filters, + const TableColumnPredicates& table_column_predicates, + FileScanRequest* file_request, + RuntimeState* runtime_state = nullptr); + void clear() { + _mappings.clear(); + _hidden_mappings.clear(); + _constant_map.clear(); + _filter_entries.clear(); + _file_schema.clear(); + _partition_values.clear(); + } + const std::vector& mappings() const { return _mappings; } + const std::map& filter_entries() const { return _filter_entries; } + const ConstantMap& constant_map() const { return _constant_map; } + std::string debug_string() const; + +protected: + // Columnar readers such as Parquet can read predicate columns first, evaluate row filters, and + // lazily read the rest. Row-oriented readers such as CSV/Text materialize one row at a time and + // should keep all required columns in one scan list. + virtual bool enable_lazy_materialization() const { return true; } + // File-layer column predicate filters are reader-specific pruning hints. Parquet consumes them + // for row-group/page-index/statistics pruning; simple delimited readers do not. + virtual bool enable_column_predicate_filters() const { return true; } + // Row-oriented readers such as CSV/Text cannot physically read only a nested child from one + // delimited text field. They must scan the whole complex top-level field and let TableReader + // rematerialize the requested table child after row-level filters have run. + virtual bool force_full_complex_scan_projection() const { return false; } + + const ColumnDefinition* _find_file_field( + const ColumnDefinition& table_column, + const std::vector& file_schema) const; + Status _create_direct_mapping(const ColumnDefinition& table_column, + const ColumnDefinition& file_field, ColumnMapping* mapping) const; + + Status _create_by_index_mapping(const ColumnDefinition& table_column, + const std::vector& file_schema, + ColumnMapping* mapping); + Status _build_filter_entries(const FileScanRequest& file_request); + Status _build_result_column_mapping(const FileScanRequest& file_request); + + void _set_constant_mapping(ColumnMapping* mapping, VExprContextSPtr expr); + Status _create_mapping_for_column(const ColumnDefinition& table_column, + GlobalIndex global_index, ColumnMapping* mapping); + Status _create_hidden_filter_mapping(const ColumnDefinition& table_column, + GlobalIndex global_index, ColumnMapping* mapping); + Status _build_hidden_filter_mappings(const std::vector& table_filters); + std::vector _filter_visible_mappings() const; + + ColumnMapping* _find_mapping(GlobalIndex global_index); + ColumnMapping* _find_filter_mapping(GlobalIndex global_index); + + TableColumnMapperOptions _options; + // Column mapping for each projected column, in the same order as projected_columns. Each entry + // describes how to get one table/global column from file-local sources, and carries metadata + // for filter localization and result finalize. + std::vector _mappings; + // Predicate-only top-level columns are not output projection columns, so keep their mappings + // here. They are visible only to filter localization and file-reader predicate construction. + std::vector _hidden_mappings; + std::map _filter_entries; + ConstantMap _constant_map; + // Split-local schema state retained from create_mapping() so create_scan_request() can build + // hidden mappings for top-level filter slots that are absent from projected_columns. + std::vector _file_schema; + std::map _partition_values; +}; + +// Parquet consumes the full FileScanRequest shape: predicate columns for lazy materialization and +// column_predicate_filters for statistics/page-index pruning. +class ParquetColumnMapper final : public TableColumnMapper { +public: + using TableColumnMapper::TableColumnMapper; +}; + +// Mapper for readers that always materialize every required file column before filtering. The +// table-to-file schema mapping is still generic, but the FileScanRequest layout is simpler: +// predicate_columns and column_predicate_filters are not populated. +class MaterializedColumnMapper final : public TableColumnMapper { +public: + using TableColumnMapper::TableColumnMapper; + +protected: + bool enable_lazy_materialization() const override { return false; } + bool enable_column_predicate_filters() const override { return false; } + bool force_full_complex_scan_projection() const override { return true; } +}; + +} // namespace doris::format diff --git a/be/src/format_v2/column_mapper_nested.cpp b/be/src/format_v2/column_mapper_nested.cpp new file mode 100644 index 00000000000000..0e3539242fff26 --- /dev/null +++ b/be/src/format_v2/column_mapper_nested.cpp @@ -0,0 +1,1050 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/column_mapper_nested.h" + +#include +#include +#include +#include +#include + +#include "common/cast_set.h" +#include "common/exception.h" +#include "core/assert_cast.h" +#include "core/data_type/convert_field_to_type.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_struct.h" +#include "core/data_type/primitive_type.h" +#include "exprs/create_predicate_function.h" +#include "exprs/vexpr.h" +#include "exprs/vin_predicate.h" +#include "format_v2/expr/cast.h" +#include "gen_cpp/Exprs_types.h" +#include "storage/predicate/null_predicate.h" +#include "storage/predicate/predicate_creator.h" + +namespace doris::format { + +namespace { + +static bool is_cast_expr(const VExprSPtr& expr) { + return dynamic_cast(expr.get()) != nullptr; +} + +static bool is_binary_comparison_predicate(const VExprSPtr& expr) { + if (expr == nullptr || expr->get_num_children() != 2 || + (expr->node_type() != TExprNodeType::BINARY_PRED && + expr->node_type() != TExprNodeType::NULL_AWARE_BINARY_PRED)) { + return false; + } + switch (expr->op()) { + case TExprOpcode::EQ: + case TExprOpcode::EQ_FOR_NULL: + case TExprOpcode::NE: + case TExprOpcode::GE: + case TExprOpcode::GT: + case TExprOpcode::LE: + case TExprOpcode::LT: + return true; + default: + return false; + } +} + +static bool is_null_predicate_function(const VExprSPtr& expr, bool* is_null) { + DORIS_CHECK(is_null != nullptr); + if (expr == nullptr || expr->node_type() != TExprNodeType::FUNCTION_CALL || + expr->get_num_children() != 1) { + return false; + } + if (expr->fn().name.function_name == "is_null_pred") { + *is_null = true; + return true; + } + if (expr->fn().name.function_name == "is_not_null_pred") { + *is_null = false; + return true; + } + return false; +} + +static bool is_signed_integer_type(PrimitiveType type) { + switch (type) { + case TYPE_TINYINT: + case TYPE_SMALLINT: + case TYPE_INT: + case TYPE_BIGINT: + case TYPE_LARGEINT: + return true; + default: + return false; + } +} + +static int primitive_integer_width(PrimitiveType type) { + switch (type) { + case TYPE_TINYINT: + return 1; + case TYPE_SMALLINT: + return 2; + case TYPE_INT: + return 4; + case TYPE_BIGINT: + return 8; + case TYPE_LARGEINT: + return 16; + default: + return 0; + } +} + +static bool is_decimal_type(PrimitiveType type) { + switch (type) { + case TYPE_DECIMAL32: + case TYPE_DECIMAL64: + case TYPE_DECIMALV2: + case TYPE_DECIMAL128I: + case TYPE_DECIMAL256: + return true; + default: + return false; + } +} + +static bool is_order_preserving_safe_cast(const DataTypePtr& from_type, + const DataTypePtr& to_type) { + if (from_type == nullptr || to_type == nullptr) { + return false; + } + const auto from_nested_type = remove_nullable(from_type); + const auto to_nested_type = remove_nullable(to_type); + if (from_nested_type->equals(*to_nested_type)) { + return true; + } + + const auto from_primitive_type = from_nested_type->get_primitive_type(); + const auto to_primitive_type = to_nested_type->get_primitive_type(); + if (is_signed_integer_type(from_primitive_type) && is_signed_integer_type(to_primitive_type)) { + return primitive_integer_width(to_primitive_type) >= + primitive_integer_width(from_primitive_type); + } + if (from_primitive_type == TYPE_FLOAT && to_primitive_type == TYPE_DOUBLE) { + return true; + } + if (is_decimal_type(from_primitive_type) && is_decimal_type(to_primitive_type)) { + return from_nested_type->get_scale() == to_nested_type->get_scale() && + to_nested_type->get_precision() >= from_nested_type->get_precision(); + } + return false; +} + +static bool parse_struct_child_selector(const VExprSPtr& expr, StructChildSelector* selector) { + DORIS_CHECK(selector != nullptr); + if (expr == nullptr || !expr->is_literal()) { + return false; + } + const Field field = literal_field(expr); + switch (field.get_type()) { + case TYPE_STRING: + case TYPE_CHAR: + case TYPE_VARCHAR: + selector->by_name = true; + selector->name = std::string(field.as_string_view()); + return true; + case TYPE_BOOLEAN: + selector->by_name = false; + selector->ordinal = field.get() ? 1 : 0; + return selector->ordinal > 0; + case TYPE_TINYINT: + selector->by_name = false; + if (field.get() <= 0) { + return false; + } + selector->ordinal = cast_set(field.get()); + return true; + case TYPE_SMALLINT: + selector->by_name = false; + if (field.get() <= 0) { + return false; + } + selector->ordinal = cast_set(field.get()); + return true; + case TYPE_INT: + selector->by_name = false; + if (field.get() <= 0) { + return false; + } + selector->ordinal = cast_set(field.get()); + return true; + case TYPE_BIGINT: + selector->by_name = false; + if (field.get() <= 0) { + return false; + } + selector->ordinal = cast_set(field.get()); + return true; + default: + return false; + } +} + +static bool extract_nested_struct_path(const VExprSPtr& expr, NestedStructPath* path) { + DORIS_CHECK(path != nullptr); + if (!is_struct_element_expr(expr)) { + return false; + } + + // Process for element_at(struct, 'field') or element_at(struct, 1) expression. + StructChildSelector selector; + if (!parse_struct_child_selector(expr->children()[1], &selector)) { + return false; + } + + const auto& parent = expr->children()[0]; + if (parent->is_slot_ref()) { + const auto* slot_ref = assert_cast(parent.get()); + path->root_global_index = slot_ref_global_index(*slot_ref); + path->selectors.clear(); + path->selectors.push_back(std::move(selector)); + return true; + } + + // Process for element_at(element_at(struct, 'field'), 'field') or + // element_at(element_at(struct, 1), 1) expression. + if (!extract_nested_struct_path(parent, path)) { + return false; + } + path->selectors.push_back(std::move(selector)); + return true; +} + +static bool extract_nested_struct_path_for_pruning(const VExprSPtr& expr, NestedStructPath* path) { + DORIS_CHECK(path != nullptr); + // Simple `ELEMENT_AT` + if (extract_nested_struct_path(expr, path)) { + return true; + } + + // `ELEMENT_AT` with `CAST` + if (!is_cast_expr(expr) || expr->get_num_children() != 1) { + return false; + } + const auto& child = expr->children()[0]; + if (!is_order_preserving_safe_cast(child->data_type(), expr->data_type())) { + return false; + } + // A safe widening cast is null-preserving and keeps the comparison ordering of the nested + // primitive leaf, so file-layer pruning can target the original leaf statistics. The row-level + // filter still evaluates the original cast expression after read. + return extract_nested_struct_path_for_pruning(child, path); +} + +static const ColumnDefinition* resolve_file_child(const std::vector& children, + const StructChildSelector& selector) { + if (selector.by_name) { + const auto child_it = std::ranges::find_if(children, [&](const ColumnDefinition& child) { + return child.name == selector.name; + }); + return child_it == children.end() ? nullptr : &*child_it; + } + if (selector.ordinal == 0 || selector.ordinal > children.size()) { + return nullptr; + } + return &children[selector.ordinal - 1]; +} + +static const DataTypeStruct* struct_type_or_null(const DataTypePtr& type) { + if (type == nullptr) { + return nullptr; + } + const auto nested_type = remove_nullable(type); + if (nested_type->get_primitive_type() != TYPE_STRUCT) { + return nullptr; + } + return assert_cast(nested_type.get()); +} + +static std::optional struct_child_index(const ColumnMapping& mapping, + const StructChildSelector& selector) { + const auto* struct_type = struct_type_or_null(mapping.table_type); + if (struct_type == nullptr) { + return std::nullopt; + } + if (selector.by_name) { + const auto position = struct_type->try_get_position_by_name(selector.name); + if (!position.has_value()) { + return std::nullopt; + } + return cast_set(*position); + } + if (selector.ordinal == 0 || selector.ordinal > struct_type->get_elements().size()) { + return std::nullopt; + } + return cast_set(selector.ordinal - 1); +} + +// Get the global child index for a child mapping. If the mapping's table type is struct, resolve +// the child index by the child mapping's table column name; otherwise, use the fallback child index. +static int32_t child_mapping_global_index(const ColumnMapping& mapping, + const ColumnMapping& child_mapping, + size_t fallback_child_idx) { + const auto* struct_type = struct_type_or_null(mapping.table_type); + if (struct_type == nullptr) { + return cast_set(fallback_child_idx); + } + const auto position = struct_type->try_get_position_by_name(child_mapping.table_column_name); + DORIS_CHECK(position.has_value()) << "Cannot find child '" << child_mapping.table_column_name + << "' in table type " << mapping.table_type->get_name(); + return cast_set(*position); +} + +static const ColumnMapping* resolve_mapped_child(const ColumnMapping& mapping, + int32_t global_child_index) { + for (size_t child_idx = 0; child_idx < mapping.child_mappings.size(); ++child_idx) { + const auto& child_mapping = mapping.child_mappings[child_idx]; + if (child_mapping_global_index(mapping, child_mapping, child_idx) == global_child_index) { + return &child_mapping; + } + } + return nullptr; +} + +enum class NestedProjectionResolveResult { + RESOLVED, + NOT_REPRESENTED, + MISSING_FILE_CHILD, +}; + +// Resolve a table-side nested struct path through the existing ColumnMapping tree and build the +// corresponding file-local projection. For example, if table column `s` has children +// `{a, renamed_b}` and file column `s` has children `{a, b}`, the filter path +// `struct_element(s, 'renamed_b')` is resolved to the file projection `s -> b` by following the +// child mapping instead of matching the table child name against the file schema. Return +// MISSING_FILE_CHILD when ColumnMapping explicitly says a table child is absent from this file; in +// that case callers must not fall back to schema-name lookup, because Iceberg can drop a field and +// later add a different field with the same name. +static NestedProjectionResolveResult resolve_nested_projection_with_mapping( + const NestedStructPath& path, const std::vector& mappings, + LocalColumnIndex* root_projection) { + DORIS_CHECK(root_projection != nullptr); + *root_projection = {}; + if (path.selectors.empty()) { + return NestedProjectionResolveResult::NOT_REPRESENTED; + } + const auto mapping_it = std::ranges::find_if(mappings, [&](const ColumnMapping& mapping) { + return mapping.global_index == path.root_global_index; + }); + if (mapping_it == mappings.end() || !mapping_it->file_local_id.has_value()) { + return NestedProjectionResolveResult::NOT_REPRESENTED; + } + + *root_projection = LocalColumnIndex::partial_local(*mapping_it->file_local_id); + auto* current_projection = root_projection; + const auto* current_mapping = &*mapping_it; + + // Traverse the ColumnMapping tree according to the table-side struct selectors and emit the + // corresponding file-local child ids. A missing child mapping means this predicate-only path + // may need schema fallback; an existing child mapping without a file id means the table child + // is genuinely absent from this file and must stay above the file reader. + for (size_t selector_idx = 0; selector_idx < path.selectors.size(); ++selector_idx) { + const auto global_child_index = + struct_child_index(*current_mapping, path.selectors[selector_idx]); + if (!global_child_index.has_value()) { + *root_projection = {}; + return NestedProjectionResolveResult::NOT_REPRESENTED; + } + const auto* child_mapping = resolve_mapped_child(*current_mapping, *global_child_index); + if (child_mapping == nullptr) { + *root_projection = {}; + return NestedProjectionResolveResult::NOT_REPRESENTED; + } + if (!child_mapping->file_local_id.has_value()) { + *root_projection = {}; + return NestedProjectionResolveResult::MISSING_FILE_CHILD; + } + + auto child_projection = LocalColumnIndex::partial_local(*child_mapping->file_local_id); + child_projection.project_all_children = selector_idx + 1 == path.selectors.size(); + current_projection->children.push_back(std::move(child_projection)); + current_projection = ¤t_projection->children.back(); + current_mapping = child_mapping; + } + return NestedProjectionResolveResult::RESOLVED; +} + +static bool table_root_is_struct(const ColumnMapping& mapping) { + return struct_type_or_null(mapping.table_type) != nullptr; +} + +static const std::vector& scan_file_children(const ColumnMapping& mapping) { + return !mapping.projected_file_children.empty() ? mapping.projected_file_children + : mapping.original_file_children; +} + +static const ColumnDefinition* resolve_file_leaf_from_projection( + const std::vector& children, const LocalColumnIndex& projection) { + const auto child_it = std::ranges::find_if(children, [&](const ColumnDefinition& child) { + return child.file_local_id() == projection.local_id(); + }); + if (child_it == children.end()) { + return nullptr; + } + if (projection.children.empty()) { + return &*child_it; + } + if (projection.children.size() != 1) { + return nullptr; + } + return resolve_file_leaf_from_projection(child_it->children, projection.children[0]); +} + +static bool collect_file_child_names_from_projection(const std::vector& children, + const LocalColumnIndex& projection, + std::vector* file_child_names, + std::vector* file_child_types) { + DORIS_CHECK(file_child_names != nullptr); + DORIS_CHECK(file_child_types != nullptr); + const auto child_it = std::ranges::find_if(children, [&](const ColumnDefinition& child) { + return child.file_local_id() == projection.local_id(); + }); + if (child_it == children.end()) { + return false; + } + file_child_names->push_back(child_it->name); + file_child_types->push_back(child_it->type); + if (projection.children.empty()) { + return true; + } + if (projection.children.size() != 1) { + return false; + } + return collect_file_child_names_from_projection(child_it->children, projection.children[0], + file_child_names, file_child_types); +} + +struct NestedPredicateTarget { + LocalColumnIndex file_projection; + FileNestedPredicateTarget file_target; + std::string leaf_name; + DataTypePtr leaf_type; +}; + +static std::unique_ptr build_struct_predicate_target_from_projection( + const std::vector& children, const LocalColumnIndex& projection) { + const auto child_it = std::ranges::find_if(children, [&](const ColumnDefinition& child) { + return child.file_local_id() == projection.local_id(); + }); + if (child_it == children.end()) { + return nullptr; + } + std::unique_ptr nested_child; + if (!projection.children.empty()) { + if (projection.children.size() != 1) { + return nullptr; + } + nested_child = build_struct_predicate_target_from_projection(child_it->children, + projection.children[0]); + if (nested_child == nullptr) { + return nullptr; + } + } + return std::make_unique(child_it->file_local_id(), child_it->name, + std::move(nested_child)); +} + +static bool build_struct_predicate_target(const ColumnMapping& root_mapping, + const LocalColumnIndex& root_projection, + FileNestedPredicateTarget* file_target) { + DORIS_CHECK(file_target != nullptr); + if (!root_projection.column_id().is_valid() || root_projection.children.size() != 1) { + return false; + } + auto struct_target = build_struct_predicate_target_from_projection( + root_mapping.original_file_children, root_projection.children[0]); + if (struct_target == nullptr) { + return false; + } + *file_target = FileNestedPredicateTarget(root_projection.column_id(), std::move(struct_target)); + return true; +} + +static bool resolve_nested_predicate_target(const NestedStructPath& path, + const std::vector& mappings, + NestedPredicateTarget* target) { + DORIS_CHECK(target != nullptr); + ResolvedNestedStructPath resolved; + if (!resolve_nested_struct_path_for_file(path, mappings, &resolved)) { + return false; + } + + const auto mapping_it = std::ranges::find_if(mappings, [&](const ColumnMapping& mapping) { + return mapping.global_index == path.root_global_index; + }); + if (mapping_it == mappings.end() || resolved.file_projection.children.size() != 1) { + return false; + } + const auto* file_leaf = resolve_file_leaf_from_projection(mapping_it->original_file_children, + resolved.file_projection.children[0]); + if (file_leaf == nullptr || file_leaf->type == nullptr) { + return false; + } + target->leaf_type = remove_nullable(file_leaf->type); + if (is_complex_type(target->leaf_type->get_primitive_type())) { + return false; + } + target->leaf_name = file_leaf->name; + target->file_projection = std::move(resolved.file_projection); + if (!build_struct_predicate_target(*mapping_it, target->file_projection, + &target->file_target)) { + return false; + } + return true; +} + +static VExprSPtr original_table_literal_for_nested_predicate(const VExprSPtr& literal_expr) { + DORIS_CHECK(literal_expr != nullptr); + DORIS_CHECK(literal_expr->is_literal()); + const auto* rewritten_literal = dynamic_cast(literal_expr.get()); + if (rewritten_literal == nullptr) { + return literal_expr; + } + return VLiteral::create_shared(rewritten_literal->original_type(), + rewritten_literal->original_field()); +} + +static std::optional to_column_predicate_type(TExprOpcode::type opcode) { + switch (opcode) { + case TExprOpcode::EQ: + return PredicateType::EQ; + case TExprOpcode::NE: + return PredicateType::NE; + case TExprOpcode::GT: + return PredicateType::GT; + case TExprOpcode::GE: + return PredicateType::GE; + case TExprOpcode::LT: + return PredicateType::LT; + case TExprOpcode::LE: + return PredicateType::LE; + default: + return std::nullopt; + } +} + +static TExprOpcode::type reverse_comparison_opcode(TExprOpcode::type opcode) { + switch (opcode) { + case TExprOpcode::GT: + return TExprOpcode::LT; + case TExprOpcode::GE: + return TExprOpcode::LE; + case TExprOpcode::LT: + return TExprOpcode::GT; + case TExprOpcode::LE: + return TExprOpcode::GE; + default: + return opcode; + } +} + +static std::shared_ptr create_comparison_column_predicate( + PredicateType predicate_type, uint32_t column_id, const std::string& column_name, + const DataTypePtr& data_type, const Field& value) { + switch (predicate_type) { + case PredicateType::EQ: + return create_comparison_predicate(column_id, column_name, data_type, + value, false); + case PredicateType::NE: + return create_comparison_predicate(column_id, column_name, data_type, + value, false); + case PredicateType::GT: + return create_comparison_predicate(column_id, column_name, data_type, + value, false); + case PredicateType::GE: + return create_comparison_predicate(column_id, column_name, data_type, + value, false); + case PredicateType::LT: + return create_comparison_predicate(column_id, column_name, data_type, + value, false); + case PredicateType::LE: + return create_comparison_predicate(column_id, column_name, data_type, + value, false); + default: + return nullptr; + } +} + +static bool extract_child_id_path_from_projection(const LocalColumnIndex& root_projection, + std::vector* file_child_id_path) { + DORIS_CHECK(file_child_id_path != nullptr); + file_child_id_path->clear(); + const auto* current_projection = &root_projection; + while (!current_projection->children.empty()) { + if (current_projection->children.size() != 1) { + file_child_id_path->clear(); + return false; + } + current_projection = ¤t_projection->children[0]; + file_child_id_path->push_back(current_projection->local_id()); + } + return !file_child_id_path->empty(); +} + +static std::shared_ptr build_nested_comparison_predicate( + const VExprSPtr& literal_expr, TExprOpcode::type opcode, LocalColumnId root_file_column_id, + const std::string& leaf_name, const DataTypePtr& file_leaf_type) { + if (literal_expr == nullptr || !literal_expr->is_literal() || file_leaf_type == nullptr) { + return nullptr; + } + const auto predicate_type = to_column_predicate_type(opcode); + if (!predicate_type.has_value()) { + return nullptr; + } + const auto original_literal = original_table_literal_for_nested_predicate(literal_expr); + const Field original_field = literal_field(original_literal); + Field file_field; + try { + convert_field_to_type(original_field, *file_leaf_type, &file_field, + original_literal->data_type().get()); + } catch (const Exception&) { + return nullptr; + } + if (file_field.is_null()) { + return nullptr; + } + try { + return create_comparison_column_predicate(*predicate_type, + cast_set(root_file_column_id.value()), + leaf_name, file_leaf_type, file_field); + } catch (const Exception&) { + return nullptr; + } +} + +static std::shared_ptr build_nested_in_list_predicate( + const VExprSPtrs& literal_exprs, LocalColumnId root_file_column_id, + const std::string& leaf_name, const DataTypePtr& file_leaf_type) { + if (literal_exprs.empty() || file_leaf_type == nullptr) { + return nullptr; + } + + auto value_column = file_leaf_type->create_column(); + for (const auto& literal_expr : literal_exprs) { + if (literal_expr == nullptr || !literal_expr->is_literal()) { + return nullptr; + } + const auto original_literal = original_table_literal_for_nested_predicate(literal_expr); + const Field original_field = literal_field(original_literal); + Field file_field; + try { + convert_field_to_type(original_field, *file_leaf_type, &file_field, + original_literal->data_type().get()); + } catch (const Exception&) { + return nullptr; + } + if (file_field.is_null()) { + return nullptr; + } + value_column->insert(file_field); + } + + std::shared_ptr values; + try { + values.reset(create_set(file_leaf_type->get_primitive_type(), literal_exprs.size(), false)); + ColumnPtr value_column_ptr = std::move(value_column); + values->insert_range_from(value_column_ptr, 0, value_column_ptr->size()); + return create_in_list_predicate( + cast_set(root_file_column_id.value()), leaf_name, file_leaf_type, values, + false); + } catch (const Exception&) { + return nullptr; + } +} + +static std::shared_ptr build_nested_null_predicate( + bool is_null, LocalColumnId root_file_column_id, const std::string& leaf_name, + const DataTypePtr& file_leaf_type) { + if (file_leaf_type == nullptr) { + return nullptr; + } + const auto leaf_primitive_type = remove_nullable(file_leaf_type)->get_primitive_type(); + return NullPredicate::create_shared(cast_set(root_file_column_id.value()), leaf_name, + is_null, leaf_primitive_type); +} + +static bool set_nested_column_filter_target(const NestedPredicateTarget& target, + FileColumnPredicateFilter* column_filter) { + DORIS_CHECK(column_filter != nullptr); + std::vector file_child_id_path; + if (!extract_child_id_path_from_projection(target.file_projection, &file_child_id_path)) { + return false; + } + column_filter->file_column_id = target.file_projection.column_id(); + column_filter->file_child_id_path = std::move(file_child_id_path); + column_filter->target = target.file_target; + return true; +} + +static bool extract_nested_binary_comparison_filter(const VExprSPtr& expr, + const std::vector& mappings, + FileColumnPredicateFilter* column_filter) { + DORIS_CHECK(column_filter != nullptr); + if (!is_binary_comparison_predicate(expr)) { + return false; + } + NestedStructPath path; + VExprSPtr literal_expr; + TExprOpcode::type opcode = expr->op(); + if (extract_nested_struct_path_for_pruning(expr->children()[0], &path) && + expr->children()[1]->is_literal()) { + literal_expr = expr->children()[1]; + } else if (extract_nested_struct_path_for_pruning(expr->children()[1], &path) && + expr->children()[0]->is_literal()) { + literal_expr = expr->children()[0]; + opcode = reverse_comparison_opcode(opcode); + } else { + return false; + } + + NestedPredicateTarget target; + if (!resolve_nested_predicate_target(path, mappings, &target)) { + return false; + } + auto predicate = build_nested_comparison_predicate(literal_expr, opcode, + target.file_projection.column_id(), + target.leaf_name, target.leaf_type); + if (predicate == nullptr) { + return false; + } + if (!set_nested_column_filter_target(target, column_filter)) { + return false; + } + column_filter->predicates.push_back(std::move(predicate)); + return true; +} + +static bool extract_nested_in_list_filter(const VExprSPtr& expr, + const std::vector& mappings, + FileColumnPredicateFilter* column_filter) { + DORIS_CHECK(column_filter != nullptr); + if (expr == nullptr || expr->node_type() != TExprNodeType::IN_PRED || + expr->get_num_children() < 2) { + return false; + } + if (const auto* in_predicate = dynamic_cast(expr.get()); + in_predicate != nullptr && in_predicate->is_not_in()) { + return false; + } + + NestedStructPath path; + if (!extract_nested_struct_path_for_pruning(expr->children()[0], &path)) { + return false; + } + + VExprSPtrs literal_exprs; + literal_exprs.reserve(expr->get_num_children() - 1); + for (size_t child_idx = 1; child_idx < expr->children().size(); ++child_idx) { + if (!expr->children()[child_idx]->is_literal()) { + return false; + } + literal_exprs.push_back(expr->children()[child_idx]); + } + + NestedPredicateTarget target; + if (!resolve_nested_predicate_target(path, mappings, &target)) { + return false; + } + auto predicate = build_nested_in_list_predicate( + literal_exprs, target.file_projection.column_id(), target.leaf_name, target.leaf_type); + if (predicate == nullptr) { + return false; + } + if (!set_nested_column_filter_target(target, column_filter)) { + return false; + } + column_filter->predicates.push_back(std::move(predicate)); + return true; +} + +static bool extract_nested_null_filter(const VExprSPtr& expr, + const std::vector& mappings, + FileColumnPredicateFilter* column_filter) { + DORIS_CHECK(column_filter != nullptr); + bool is_null = false; + if (!is_null_predicate_function(expr, &is_null)) { + return false; + } + + NestedStructPath path; + if (!extract_nested_struct_path_for_pruning(expr->children()[0], &path)) { + return false; + } + + NestedPredicateTarget target; + if (!resolve_nested_predicate_target(path, mappings, &target)) { + return false; + } + auto predicate = build_nested_null_predicate(is_null, target.file_projection.column_id(), + target.leaf_name, target.leaf_type); + if (predicate == nullptr) { + return false; + } + if (!set_nested_column_filter_target(target, column_filter)) { + return false; + } + column_filter->predicates.push_back(std::move(predicate)); + return true; +} + +} // namespace + +SplitLocalFileLiteral::SplitLocalFileLiteral(const DataTypePtr& file_type, const Field& file_field, + DataTypePtr original_type, Field original_field) + : VLiteral(file_type, file_field), + _original_type(std::move(original_type)), + _original_field(std::move(original_field)) {} + +GlobalIndex slot_ref_global_index(const VSlotRef& slot_ref) { + DORIS_CHECK(slot_ref.column_id() >= 0); + return GlobalIndex(cast_set(slot_ref.column_id())); +} + +bool is_struct_element_expr(const VExprSPtr& expr) { + if (expr == nullptr || expr->get_num_children() != 2) { + return false; + } + const auto& function_name = expr->fn().name.function_name; + if (function_name == "struct_element") { + return true; + } + if (function_name != "element_at") { + return false; + } + const auto& parent_type = expr->children()[0]->data_type(); + return parent_type != nullptr && + remove_nullable(parent_type)->get_primitive_type() == TYPE_STRUCT; +} + +Field literal_field(const VExprSPtr& literal_expr) { + DORIS_CHECK(literal_expr != nullptr); + DORIS_CHECK(literal_expr->is_literal()); + const auto* literal = dynamic_cast(literal_expr.get()); + DORIS_CHECK(literal != nullptr); + Field field; + literal->get_column_ptr()->get(0, field); + return field; +} + +bool resolve_nested_struct_path_for_file(const NestedStructPath& path, + const std::vector& mappings, + ResolvedNestedStructPath* resolved, + bool require_scan_projection) { + DORIS_CHECK(resolved != nullptr); + *resolved = {}; + const auto mapping_it = std::ranges::find_if(mappings, [&](const ColumnMapping& mapping) { + return mapping.global_index == path.root_global_index; + }); + if (mapping_it == mappings.end() || !mapping_it->file_local_id.has_value() || + path.selectors.empty()) { + return false; + } + + // Prefer ColumnMapping over schema-name lookup. This is the only path that can correctly + // localize renamed Iceberg fields: a table filter `element_at(s, 'renamed_b')` must become a + // file filter on physical child `b`, even if the old file type is `STRUCT`. + const auto mapping_result = + resolve_nested_projection_with_mapping(path, mappings, &resolved->file_projection); + if (mapping_result == NestedProjectionResolveResult::MISSING_FILE_CHILD) { + return false; + } + if (mapping_result == NestedProjectionResolveResult::NOT_REPRESENTED) { + if (!table_root_is_struct(*mapping_it)) { + return false; + } + LocalColumnIndex child_projection; + if (!build_file_child_projection_from_schema(mapping_it->original_file_children, + path.selectors, &child_projection) + .ok() || + child_projection.local_id() < 0) { + return false; + } + resolved->file_projection = LocalColumnIndex::partial_local(*mapping_it->file_local_id); + resolved->file_projection.children.push_back(std::move(child_projection)); + } + + if (resolved->file_projection.children.size() != 1) { + *resolved = {}; + return false; + } + // When rewriting the final localized element_at chain, it executes on the file column produced + // by this scan, so the intermediate return types must match the projected file shape, not the + // full historical file schema. Example: + // SELECT s.c WHERE element_at(element_at(s, 'b'), 'cc') LIKE 'NestedC%' + // reads only b.cc and c; the inner element_at(s, 'b') returns Struct(cc), not + // Struct(cc, new_dd). + // + // Earlier projection collection also calls this resolver before filter-only children have been + // merged into the scan projection. That phase only needs the file path, so it still resolves + // names/types from the original file schema. + const auto& child_source = require_scan_projection ? scan_file_children(*mapping_it) + : mapping_it->original_file_children; + if (!collect_file_child_names_from_projection( + child_source, resolved->file_projection.children[0], &resolved->file_child_names, + &resolved->file_child_types) || + resolved->file_child_names.size() != path.selectors.size() || + resolved->file_child_types.size() != path.selectors.size()) { + *resolved = {}; + return false; + } + return true; +} + +bool resolve_nested_struct_expr_for_file(const VExprSPtr& expr, + const std::vector& mappings, + ResolvedNestedStructPath* resolved) { + DORIS_CHECK(resolved != nullptr); + NestedStructPath path; + if (!extract_nested_struct_path(expr, &path)) { + *resolved = {}; + return false; + } + return resolve_nested_struct_path_for_file(path, mappings, resolved, true); +} + +// Collect nested struct leaf references that can be turned into file-reader projections and +// primitive pruning predicates. For example, from `s.a > 1 AND element_at(s, 'b') = 2`, this +// records two paths rooted at `s`: `s -> a` and `s -> b`. Non-struct expressions are traversed +// recursively, while a recognized struct path is emitted once so the caller can merge it into the +// scan projection for that top-level file column. +void collect_nested_struct_paths(const VExprSPtr& expr, std::vector* paths) { + DORIS_CHECK(paths != nullptr); + if (expr == nullptr) { + return; + } + NestedStructPath path; + if (extract_nested_struct_path_for_pruning(expr, &path)) { + paths->push_back(std::move(path)); + return; + } + for (const auto& child : expr->children()) { + collect_nested_struct_paths(child, paths); + } +} + +std::vector present_child_mappings_in_file_order( + const std::vector& child_mappings) { + std::vector result; + result.reserve(child_mappings.size()); + for (const auto& child_mapping : child_mappings) { + if (child_mapping.file_local_id.has_value()) { + result.push_back(&child_mapping); + } + } + std::ranges::sort(result, [](const ColumnMapping* lhs, const ColumnMapping* rhs) { + DORIS_CHECK(lhs->file_local_id.has_value()); + DORIS_CHECK(rhs->file_local_id.has_value()); + return *lhs->file_local_id < *rhs->file_local_id; + }); + return result; +} + +// Build the nested child projection under a top-level file column by walking file schema children +// directly. The returned projection does not include the root column id; callers attach it under a +// `LocalColumnIndex::partial_local(root_id)` when merging into the scan request. +Status build_file_child_projection_from_schema(const std::vector& children, + std::span selectors, + LocalColumnIndex* projection) { + DORIS_CHECK(projection != nullptr); + if (selectors.empty()) { + return Status::InvalidArgument("Nested struct selector path is empty"); + } + const auto* child = resolve_file_child(children, selectors.front()); + if (child == nullptr) { + return Status::OK(); + } + *projection = LocalColumnIndex::local(child->file_local_id()); + projection->project_all_children = selectors.size() == 1; + projection->children.clear(); + if (selectors.size() == 1) { + return Status::OK(); + } + if (child->children.empty() || + remove_nullable(child->type)->get_primitive_type() != TYPE_STRUCT) { + *projection = LocalColumnIndex {}; + return Status::OK(); + } + LocalColumnIndex child_projection; + RETURN_IF_ERROR(build_file_child_projection_from_schema(child->children, selectors.subspan(1), + &child_projection)); + if (child_projection.local_id() < 0) { + *projection = LocalColumnIndex {}; + return Status::OK(); + } + projection->children.push_back(std::move(child_projection)); + return Status::OK(); +} + +// Merge predicates that target the same physical file column or nested leaf. For example, +// `WHERE s.b > 1 AND s.b < 10` produces two predicates for the same target `s -> b`; keeping them +// in one FileColumnPredicateFilter lets the file reader apply both pruning checks to the same leaf +// instead of carrying duplicate target entries. +void merge_column_predicate_filter(FileColumnPredicateFilter column_filter, + std::vector* filters) { + DORIS_CHECK(filters != nullptr); + auto existing_filter_it = std::ranges::find_if(*filters, [&](const auto& existing_filter) { + return existing_filter.same_target_as(column_filter); + }); + if (existing_filter_it == filters->end()) { + filters->push_back(std::move(column_filter)); + return; + } + existing_filter_it->predicates.insert(existing_filter_it->predicates.end(), + column_filter.predicates.begin(), + column_filter.predicates.end()); +} + +// Extract file-column pruning predicates from localized row-level conjuncts that reference nested +// struct leaves. This is separate from file_request->conjuncts: conjuncts do row filtering, while +// FileColumnPredicateFilter carries primitive leaf predicates for file/page/statistics pruning. +// +// Example: for `WHERE s.b.c > 10 AND element_at(s, 'd') IS NOT NULL`, this function emits pruning +// filters for the nested targets `s -> b -> c` and `s -> d`. The caller only invokes it after +// table_filter_has_only_local_entries() succeeds, so each root slot already has a file-local scan +// source in _filter_entries. +void collect_nested_column_predicate_filters(const VExprSPtr& expr, + const std::vector& mappings, + std::vector* filters) { + DORIS_CHECK(filters != nullptr); + if (expr == nullptr) { + return; + } + if (expr->node_type() == TExprNodeType::COMPOUND_PRED && + expr->op() == TExprOpcode::COMPOUND_AND) { + for (const auto& child : expr->children()) { + collect_nested_column_predicate_filters(child, mappings, filters); + } + return; + } + FileColumnPredicateFilter column_filter; + if (extract_nested_binary_comparison_filter(expr, mappings, &column_filter) || + extract_nested_in_list_filter(expr, mappings, &column_filter) || + extract_nested_null_filter(expr, mappings, &column_filter)) { + merge_column_predicate_filter(std::move(column_filter), filters); + } +} + +} // namespace doris::format diff --git a/be/src/format_v2/column_mapper_nested.h b/be/src/format_v2/column_mapper_nested.h new file mode 100644 index 00000000000000..b8b3f1f3334a8f --- /dev/null +++ b/be/src/format_v2/column_mapper_nested.h @@ -0,0 +1,105 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +#include "common/status.h" +#include "core/data_type/data_type.h" +#include "core/field.h" +#include "exprs/vexpr_fwd.h" +#include "exprs/vliteral.h" +#include "exprs/vslot_ref.h" +#include "format_v2/column_mapper.h" +#include "format_v2/file_reader.h" + +namespace doris::format { + +struct StructChildSelector { + bool by_name = true; + std::string name; + size_t ordinal = 0; +}; + +struct NestedStructPath { + GlobalIndex root_global_index; + std::vector selectors; +}; + +struct ResolvedNestedStructPath { + LocalColumnIndex file_projection; + std::vector file_child_names; + std::vector file_child_types; +}; + +// A split-local literal produced by slot-literal predicate localization. This wrapper keeps the +// original table literal so a cloned conjunct can be localized again for another split. +class SplitLocalFileLiteral final : public VLiteral { +public: + SplitLocalFileLiteral(const DataTypePtr& file_type, const Field& file_field, + DataTypePtr original_type, Field original_field); + + const DataTypePtr& original_type() const { return _original_type; } + const Field& original_field() const { return _original_field; } + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + Field file_field; + get_column_ptr()->get(0, file_field); + *cloned_expr = std::make_shared(_data_type, file_field, + _original_type, _original_field); + return Status::OK(); + } + +private: + DataTypePtr _original_type; + Field _original_field; +}; + +GlobalIndex slot_ref_global_index(const VSlotRef& slot_ref); +bool is_struct_element_expr(const VExprSPtr& expr); +Field literal_field(const VExprSPtr& literal_expr); + +bool resolve_nested_struct_path_for_file(const NestedStructPath& path, + const std::vector& mappings, + ResolvedNestedStructPath* resolved, + bool require_scan_projection = false); + +bool resolve_nested_struct_expr_for_file(const VExprSPtr& expr, + const std::vector& mappings, + ResolvedNestedStructPath* resolved); + +void collect_nested_struct_paths(const VExprSPtr& expr, std::vector* paths); + +std::vector present_child_mappings_in_file_order( + const std::vector& child_mappings); + +Status build_file_child_projection_from_schema(const std::vector& children, + std::span selectors, + LocalColumnIndex* projection); + +void merge_column_predicate_filter(FileColumnPredicateFilter column_filter, + std::vector* filters); + +void collect_nested_column_predicate_filters(const VExprSPtr& expr, + const std::vector& mappings, + std::vector* filters); + +} // namespace doris::format diff --git a/be/src/format_v2/delimited_text/csv_reader.cpp b/be/src/format_v2/delimited_text/csv_reader.cpp new file mode 100644 index 00000000000000..711146a9880479 --- /dev/null +++ b/be/src/format_v2/delimited_text/csv_reader.cpp @@ -0,0 +1,295 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/delimited_text/csv_reader.h" + +#include +#include + +#include "core/assert_cast.h" +#include "core/column/column_nullable.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type_serde/data_type_string_serde.h" +#include "format/file_reader/new_plain_binary_line_reader.h" +#include "format/file_reader/new_plain_text_line_reader.h" +#include "gen_cpp/internal_service.pb.h" +#include "runtime/descriptors.h" +#include "runtime/runtime_state.h" +#include "util/decompressor.h" +#include "util/utf8_check.h" + +namespace doris::format::csv { +namespace { + +bool starts_with_at(const Slice& line, size_t pos, const std::string& needle) { + return !needle.empty() && pos + needle.size() <= line.size && + std::memcmp(line.data + pos, needle.data(), needle.size()) == 0; +} + +bool is_csv_text_format(TFileFormatType::type format_type) { + switch (format_type) { + case TFileFormatType::FORMAT_CSV_PLAIN: + case TFileFormatType::FORMAT_CSV_GZ: + case TFileFormatType::FORMAT_CSV_BZ2: + case TFileFormatType::FORMAT_CSV_LZ4FRAME: + case TFileFormatType::FORMAT_CSV_LZ4BLOCK: + case TFileFormatType::FORMAT_CSV_LZOP: + case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK: + case TFileFormatType::FORMAT_CSV_DEFLATE: + return true; + default: + return false; + } +} + +} // namespace + +CsvReader::CsvReader(std::shared_ptr& system_properties, + std::unique_ptr& file_description, + std::shared_ptr io_ctx, RuntimeProfile* profile, + const TFileScanRangeParams* scan_params, + const std::vector& file_slot_descs, + TFileCompressType::type range_compress_type, + std::optional stream_load_id) + : DelimitedTextReader(system_properties, file_description, std::move(io_ctx), profile, + scan_params, file_slot_descs, range_compress_type, + std::move(stream_load_id), "CSV") {} + +CsvReader::~CsvReader() = default; + +Status CsvReader::_init_format_state() { + _file_format_type = _scan_params->format_type; + _file_compress_type = + _range_compress_type != TFileCompressType::UNKNOWN + ? _range_compress_type + : (_scan_params->__isset.compress_type ? _scan_params->compress_type + : TFileCompressType::UNKNOWN); + if (_file_compress_type == TFileCompressType::UNKNOWN && + _file_format_type == TFileFormatType::FORMAT_CSV_PLAIN) { + // FORMAT_CSV_PLAIN is an uncompressed byte stream even when FE does not fill + // compress_type. Non-first splits rely on this normalization; otherwise UNKNOWN would be + // rejected by the split-compressed-file guard in the shared reader base. + _file_compress_type = TFileCompressType::PLAIN; + } + + const auto& text_params = _scan_params->file_attributes.text_params; + _value_separator = text_params.column_separator; + _line_delimiter = text_params.line_delimiter; + if (text_params.__isset.enclose) { + _enclose = text_params.enclose; + } + if (text_params.__isset.escape) { + _escape = text_params.escape; + } + _trim_tailing_spaces = _runtime_state != nullptr && + _runtime_state->trim_tailing_spaces_for_external_table_query(); + _options.escape_char = _escape; + _options.quote_char = _enclose; + _options.collection_delim = + text_params.collection_delimiter.empty() ? ',' : text_params.collection_delimiter[0]; + _options.map_key_delim = + text_params.mapkv_delimiter.empty() ? ':' : text_params.mapkv_delimiter[0]; + if (text_params.__isset.null_format) { + _options.null_format = text_params.null_format.data(); + _options.null_len = text_params.null_format.length(); + } + if (_scan_params->file_attributes.__isset.trim_double_quotes) { + _trim_double_quotes = _scan_params->file_attributes.trim_double_quotes; + } + _options.converted_from_string = _trim_double_quotes; + if (_runtime_state != nullptr) { + _keep_cr = _runtime_state->query_options().keep_carriage_return; + } + if (text_params.__isset.empty_field_as_null) { + _empty_field_as_null = text_params.empty_field_as_null; + } + return Status::OK(); +} + +Status CsvReader::_create_decompressor() { + if (_file_compress_type != TFileCompressType::UNKNOWN) { + return Decompressor::create_decompressor(_file_compress_type, &_decompressor); + } + return Decompressor::create_decompressor(_file_format_type, &_decompressor); +} + +Status CsvReader::_create_line_reader() { + if (is_csv_text_format(_file_format_type)) { + std::shared_ptr text_line_reader_ctx; + if (_enclose == 0) { + text_line_reader_ctx = std::make_shared( + _line_delimiter, _line_delimiter.size(), _keep_cr); + } else { + // The enclosed-line context finds logical records that may span physical newlines. + // Field slicing still happens in `_split_line()` because the v2 scan request may ask + // for CSV ordinals in a different order from the physical file. + const size_t col_sep_num = + _source_file_slot_descs.size() > 1 ? _source_file_slot_descs.size() - 1 : 0; + text_line_reader_ctx = std::make_shared( + _line_delimiter, _line_delimiter.size(), _value_separator, + _value_separator.size(), col_sep_num, _enclose, _escape, _keep_cr); + } + _line_reader = NewPlainTextLineReader::create_unique( + _profile, _file_reader, _decompressor.get(), std::move(text_line_reader_ctx), _size, + _start_offset); + return Status::OK(); + } + if (_file_format_type == TFileFormatType::FORMAT_PROTO) { + _line_reader = NewPlainBinaryLineReader::create_unique(_file_reader); + return Status::OK(); + } + return Status::InternalError("Unknown CSV format type {}", _file_format_type); +} + +Status CsvReader::_validate_line(const Slice& line) { + if (_file_format_type != TFileFormatType::FORMAT_PROTO && _enable_text_validate_utf8 && + !validate_utf8(line.data, line.size)) { + return Status::InternalError("Only support csv data in utf8 codec"); + } + return Status::OK(); +} + +void CsvReader::_split_line(const Slice& line) { + _split_values.clear(); + if (_file_format_type == TFileFormatType::FORMAT_PROTO) { + auto** row_ptr = reinterpret_cast(line.data); + PDataRow* row = *row_ptr; + for (const PDataColumn& col : row->col()) { + _split_values.emplace_back(col.value()); + } + return; + } + + // The text line reader is responsible for split boundaries and multi-line quoted fields. + // Field slicing still happens here because FileScannerV2 asks columns by file-local id, so we + // must be able to materialize only the requested CSV ordinals without building a row object. + // Example: for `1,"a,b",10` and column separator `,`, this loop returns three slices: + // `1`, `a,b`, and `10`; the comma inside quotes does not create an extra field. + bool in_quote = false; + bool escaped = false; + size_t start = 0; + size_t i = 0; + while (i < line.size) { + const char ch = line.data[i]; + if (_enclose != 0) { + if (escaped) { + escaped = false; + ++i; + continue; + } + if (_escape != 0 && ch == _escape) { + escaped = true; + ++i; + continue; + } + if (ch == _enclose) { + if (in_quote && i + 1 < line.size && line.data[i + 1] == _enclose) { + i += 2; + continue; + } + in_quote = !in_quote; + ++i; + continue; + } + } + if (!in_quote && starts_with_at(line, i, _value_separator)) { + size_t value_start = start; + size_t value_len = i - start; + while (_trim_tailing_spaces && value_len > 0 && + line.data[value_start + value_len - 1] == ' ') { + --value_len; + } + if (_trim_double_quotes && value_len > 1 && line.data[value_start] == '"' && + line.data[value_start + value_len - 1] == '"') { + ++value_start; + value_len -= 2; + } else if (_enclose != 0 && value_len > 1 && line.data[value_start] == _enclose && + line.data[value_start + value_len - 1] == _enclose) { + ++value_start; + value_len -= 2; + } + _split_values.emplace_back(line.data + value_start, value_len); + i += _value_separator.size(); + start = i; + continue; + } + ++i; + } + + size_t value_start = start; + size_t value_len = line.size - start; + while (_trim_tailing_spaces && value_len > 0 && line.data[value_start + value_len - 1] == ' ') { + --value_len; + } + if (_trim_double_quotes && value_len > 1 && line.data[value_start] == '"' && + line.data[value_start + value_len - 1] == '"') { + ++value_start; + value_len -= 2; + } else if (_enclose != 0 && value_len > 1 && line.data[value_start] == _enclose && + line.data[value_start + value_len - 1] == _enclose) { + ++value_start; + value_len -= 2; + } + _split_values.emplace_back(line.data + value_start, value_len); +} + +Status CsvReader::_deserialize_one_cell(const RequestedColumn& column, IColumn* output, + Slice value) { + DORIS_CHECK(output != nullptr); + if (column.nullable_string_fast_path) { + auto& null_column = assert_cast(*output); + // String is the hottest CSV type. Avoid the generic nullable serde wrapper here: + // deserialize directly into the nested string column and append the null map bit ourselves. + if (_empty_field_as_null && value.size == 0) { + null_column.insert_data(nullptr, 0); + return Status::OK(); + } + // CSV keeps empty-field handling separate from null_format matching. An empty + // null_format must not turn every empty CSV field into NULL unless FE explicitly sets + // empty_field_as_null; OpenCSV-compatible tables expect empty fields to stay empty strings. + if (_options.null_len > 0 && value.size == _options.null_len && + std::memcmp(value.data, _options.null_format, value.size) == 0) { + null_column.insert_data(nullptr, 0); + return Status::OK(); + } + static DataTypeStringSerDe string_serde(TYPE_STRING); + auto status = string_serde.deserialize_one_cell_from_csv(null_column.get_nested_column(), + value, _options); + if (!status.ok()) { + null_column.insert_data(nullptr, 0); + return Status::OK(); + } + null_column.get_null_map_data().push_back(0); + return Status::OK(); + } + return column.serde->deserialize_one_cell_from_csv(*output, value, _options); +} + +Slice CsvReader::_normalize_value(Slice value) const { + if (_empty_field_as_null && value.size == 0) { + return Slice(_options.null_format, _options.null_len); + } + return value; +} + +bool CsvReader::_can_split() const { + return (_file_compress_type == TFileCompressType::PLAIN) || + (_file_compress_type == TFileCompressType::UNKNOWN && + _file_format_type == TFileFormatType::FORMAT_CSV_PLAIN); +} + +} // namespace doris::format::csv diff --git a/be/src/format_v2/delimited_text/csv_reader.h b/be/src/format_v2/delimited_text/csv_reader.h new file mode 100644 index 00000000000000..e5d1ce25a74f40 --- /dev/null +++ b/be/src/format_v2/delimited_text/csv_reader.h @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "format_v2/delimited_text/delimited_text_reader.h" +#include "gen_cpp/PlanNodes_types.h" +#include "util/slice.h" + +namespace doris { +class SlotDescriptor; +} // namespace doris + +namespace doris::format::csv { + +// FileScannerV2 CSV reader. +// +// CSV files do not carry a physical schema. FE provides the table slot descriptors plus +// TFileScanRangeParams::column_idxs, where each file slot maps to a CSV field ordinal. This reader +// exposes that information as a v2 file-local schema and implements CSV parsing directly in the v2 +// FileReader contract. +class CsvReader final : public ::doris::format::DelimitedTextReader { +public: + // `file_slot_descs` must contain only columns physically readable from the CSV payload. + // Partition/default/virtual columns are materialized by TableReader after this reader returns + // a file-local block. Keeping that boundary is important because CSV has no embedded schema + // from which those non-file columns could be derived. + CsvReader(std::shared_ptr& system_properties, + std::unique_ptr& file_description, + std::shared_ptr io_ctx, RuntimeProfile* profile, + const TFileScanRangeParams* scan_params, + const std::vector& file_slot_descs, + TFileCompressType::type range_compress_type = TFileCompressType::UNKNOWN, + std::optional stream_load_id = std::nullopt); + ~CsvReader() override; + +private: + Status _init_format_state() override; + Status _create_decompressor() override; + Status _create_line_reader() override; + Status _validate_line(const Slice& line) override; + void _split_line(const Slice& line) override; + Status _deserialize_one_cell(const RequestedColumn& column, IColumn* output, + Slice value) override; + Slice _normalize_value(Slice value) const override; + bool _can_split() const override; + + TFileFormatType::type _file_format_type = TFileFormatType::FORMAT_CSV_PLAIN; + char _enclose = 0; + bool _trim_double_quotes = false; + bool _trim_tailing_spaces = false; + bool _empty_field_as_null = false; + bool _keep_cr = false; +}; + +} // namespace doris::format::csv diff --git a/be/src/format_v2/delimited_text/delimited_text_reader.cpp b/be/src/format_v2/delimited_text/delimited_text_reader.cpp new file mode 100644 index 00000000000000..f6e84b4aa7750e --- /dev/null +++ b/be/src/format_v2/delimited_text/delimited_text_reader.cpp @@ -0,0 +1,644 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/delimited_text/delimited_text_reader.h" + +#include +#include +#include +#include + +#include "common/cast_set.h" +#include "common/consts.h" +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_nullable.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type/data_type_struct.h" +#include "format/line_reader.h" +#include "format_v2/column_mapper.h" +#include "format_v2/materialized_reader_util.h" +#include "io/file_factory.h" +#include "io/fs/tracing_file_reader.h" +#include "runtime/descriptors.h" +#include "runtime/runtime_state.h" +#include "util/decompressor.h" +#include "util/string_util.h" + +namespace doris::format { +namespace { + +constexpr const char* DELIMITED_TEXT_PROFILE = "DelimitedTextReader"; + +void update_counter(RuntimeProfile::Counter* counter, int64_t value) { + if (counter != nullptr) { + COUNTER_UPDATE(counter, value); + } +} + +DataTypePtr nullable_type(DataTypePtr type) { + return type != nullptr && type->is_nullable() ? std::move(type) + : make_nullable(std::move(type)); +} + +DataTypePtr delimited_file_type_from_slot_type(const DataTypePtr& type) { + if (type == nullptr) { + return nullptr; + } + + const bool is_nullable = type->is_nullable(); + const auto nested_type = remove_nullable(type); + DataTypePtr file_type; + switch (nested_type->get_primitive_type()) { + case TYPE_CHAR: + case TYPE_VARCHAR: + // Delimited text files do not carry CHAR/VARCHAR length metadata. FE slot types describe + // the table target type, not a bounded physical file type. Expose bounded strings as + // unbounded STRING on the file side so TableReader can later enforce the table length. + // Example: a TEXT field "hangzhou" mapped to table CHAR(3) must be read as STRING and + // truncated to "han" during table materialization. + file_type = std::make_shared(); + break; + case TYPE_ARRAY: { + const auto* array_type = assert_cast(nested_type.get()); + file_type = std::make_shared( + delimited_file_type_from_slot_type(array_type->get_nested_type())); + break; + } + case TYPE_MAP: { + const auto* map_type = assert_cast(nested_type.get()); + file_type = std::make_shared( + delimited_file_type_from_slot_type(map_type->get_key_type()), + delimited_file_type_from_slot_type(map_type->get_value_type())); + break; + } + case TYPE_STRUCT: { + const auto* struct_type = assert_cast(nested_type.get()); + DataTypes file_children; + file_children.reserve(struct_type->get_elements().size()); + for (const auto& child_type : struct_type->get_elements()) { + file_children.push_back(delimited_file_type_from_slot_type(child_type)); + } + file_type = + std::make_shared(file_children, struct_type->get_element_names()); + break; + } + default: + file_type = nested_type; + break; + } + + return is_nullable ? make_nullable(file_type) : file_type; +} + +ColumnDefinition synthetic_file_child(const std::string& name, DataTypePtr type, int32_t local_id); + +std::vector synthesize_file_children_from_type(const DataTypePtr& type) { + std::vector children; + if (type == nullptr) { + return children; + } + const auto nested_type = remove_nullable(type); + switch (nested_type->get_primitive_type()) { + case TYPE_ARRAY: { + const auto* array_type = assert_cast(nested_type.get()); + children.push_back(synthetic_file_child("element", array_type->get_nested_type(), 0)); + break; + } + case TYPE_MAP: { + const auto* map_type = assert_cast(nested_type.get()); + children.push_back(synthetic_file_child("key", map_type->get_key_type(), 0)); + children.push_back(synthetic_file_child("value", map_type->get_value_type(), 1)); + break; + } + case TYPE_STRUCT: { + const auto* struct_type = assert_cast(nested_type.get()); + children.reserve(struct_type->get_elements().size()); + for (size_t idx = 0; idx < struct_type->get_elements().size(); ++idx) { + children.push_back(synthetic_file_child(struct_type->get_element_name(idx), + struct_type->get_element(idx), + cast_set(idx))); + } + break; + } + default: + break; + } + return children; +} + +ColumnDefinition synthetic_file_child(const std::string& name, DataTypePtr type, int32_t local_id) { + ColumnDefinition child; + child.identifier = Field::create_field(name); + child.local_id = local_id; + child.name = name; + child.type = std::move(type); + child.children = synthesize_file_children_from_type(child.type); + return child; +} + +} // namespace + +DelimitedTextReader::DelimitedTextReader( + std::shared_ptr& system_properties, + std::unique_ptr& file_description, + std::shared_ptr io_ctx, RuntimeProfile* profile, + const TFileScanRangeParams* scan_params, + const std::vector& file_slot_descs, + TFileCompressType::type range_compress_type, std::optional stream_load_id, + std::string reader_name) + : FileReader(system_properties, file_description, std::move(io_ctx), profile), + _scan_params(scan_params), + _source_file_slot_descs(file_slot_descs), + _range_compress_type(range_compress_type), + _stream_load_id(std::move(stream_load_id)), + _reader_name(std::move(reader_name)) {} + +DelimitedTextReader::~DelimitedTextReader() { + static_cast(close()); +} + +void DelimitedTextReader::_init_profile() { + if (_profile == nullptr || _text_profile.raw_lines_read != nullptr) { + return; + } + + ADD_TIMER_WITH_LEVEL(_profile, DELIMITED_TEXT_PROFILE, 1); + _text_profile.open_file_time = + ADD_CHILD_TIMER_WITH_LEVEL(_profile, "OpenFileTime", DELIMITED_TEXT_PROFILE, 1); + _text_profile.create_line_reader_time = + ADD_CHILD_TIMER_WITH_LEVEL(_profile, "CreateLineReaderTime", DELIMITED_TEXT_PROFILE, 1); + _text_profile.read_line_time = + ADD_CHILD_TIMER_WITH_LEVEL(_profile, "ReadLineTime", DELIMITED_TEXT_PROFILE, 1); + _text_profile.split_line_time = + ADD_CHILD_TIMER_WITH_LEVEL(_profile, "SplitLineTime", DELIMITED_TEXT_PROFILE, 1); + _text_profile.deserialize_time = + ADD_CHILD_TIMER_WITH_LEVEL(_profile, "DeserializeTime", DELIMITED_TEXT_PROFILE, 1); + _text_profile.conjunct_filter_time = + ADD_CHILD_TIMER_WITH_LEVEL(_profile, "ConjunctFilterTime", DELIMITED_TEXT_PROFILE, 1); + _text_profile.delete_conjunct_filter_time = ADD_CHILD_TIMER_WITH_LEVEL( + _profile, "DeleteConjunctFilterTime", DELIMITED_TEXT_PROFILE, 1); + _text_profile.raw_lines_read = ADD_CHILD_COUNTER_WITH_LEVEL( + _profile, "RawLinesRead", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1); + _text_profile.rows_read_before_filter = ADD_CHILD_COUNTER_WITH_LEVEL( + _profile, "RowsReadBeforeFilter", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1); + _text_profile.rows_filtered_by_conjunct = ADD_CHILD_COUNTER_WITH_LEVEL( + _profile, "RowsFilteredByConjunct", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1); + _text_profile.rows_filtered_by_delete_conjunct = ADD_CHILD_COUNTER_WITH_LEVEL( + _profile, "RowsFilteredByDeleteConjunct", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1); + _text_profile.rows_returned = ADD_CHILD_COUNTER_WITH_LEVEL( + _profile, "RowsReturned", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1); + _text_profile.empty_lines_read = ADD_CHILD_COUNTER_WITH_LEVEL( + _profile, "EmptyLinesRead", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1); + _text_profile.skipped_lines = ADD_CHILD_COUNTER_WITH_LEVEL( + _profile, "SkippedLines", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1); + _text_profile.cells_deserialized = ADD_CHILD_COUNTER_WITH_LEVEL( + _profile, "CellsDeserialized", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1); +} + +Status DelimitedTextReader::init(RuntimeState* state) { + _init_profile(); + _runtime_state = state; + if (_scan_params == nullptr) { + return Status::InvalidArgument("{} v2 reader requires scan params", _reader_name); + } + if (_file_description == nullptr) { + return Status::InvalidArgument("{} v2 reader requires file description", _reader_name); + } + if (!_scan_params->__isset.file_attributes || + !_scan_params->file_attributes.__isset.text_params) { + return Status::InvalidArgument("{} v2 reader requires text file attributes", _reader_name); + } + _enable_text_validate_utf8 = !_scan_params->file_attributes.__isset.enable_text_validate_utf8 || + _scan_params->file_attributes.enable_text_validate_utf8; + + RETURN_IF_ERROR(_init_format_state()); + + // Delimited text files have no physical column ids. FE sends `column_idxs` to describe how + // each physical file slot maps to a field ordinal in the text row. The local id exposed in the + // file schema is therefore the text-field ordinal, not the slot vector position. + _source_column_idxs.clear(); + if (_scan_params->__isset.column_idxs && !_scan_params->column_idxs.empty()) { + if (_scan_params->column_idxs.size() != _source_file_slot_descs.size()) { + return Status::InvalidArgument( + "{} v2 reader column_idxs size {} does not match file slot size {}", + _reader_name, _scan_params->column_idxs.size(), _source_file_slot_descs.size()); + } + _source_column_idxs.reserve(_scan_params->column_idxs.size()); + for (const auto column_idx : _scan_params->column_idxs) { + _source_column_idxs.push_back(column_idx); + } + } else { + _source_column_idxs.reserve(_source_file_slot_descs.size()); + for (size_t i = 0; i < _source_file_slot_descs.size(); ++i) { + _source_column_idxs.push_back(static_cast(i)); + } + } + + _source_serdes = create_data_type_serdes(_source_file_slot_descs); + _file_schema.clear(); + _file_schema.reserve(_source_file_slot_descs.size()); + for (size_t i = 0; i < _source_file_slot_descs.size(); ++i) { + const auto* slot = _source_file_slot_descs[i]; + DORIS_CHECK(slot != nullptr); + ColumnDefinition field; + field.identifier = Field::create_field(slot->col_name()); + field.local_id = _source_column_idxs[i]; + field.name = slot->col_name(); + field.type = nullable_type(delimited_file_type_from_slot_type(slot->get_data_type_ptr())); + // Delimited text stores a complex value in one top-level text field, but TableColumnMapper + // still needs semantic children to localize nested projections and predicates. Expose + // ARRAY element, MAP key/value, and STRUCT fields as file-schema children while keeping the + // top-level local id as the physical text field ordinal from column_idxs. + field.children = synthesize_file_children_from_type(field.type); + _file_schema.push_back(std::move(field)); + } + _eof = false; + return Status::OK(); +} + +Status DelimitedTextReader::get_schema(std::vector* file_schema) const { + if (file_schema == nullptr) { + return Status::InvalidArgument("{} v2 file_schema is null", _reader_name); + } + *file_schema = _file_schema; + return Status::OK(); +} + +std::unique_ptr DelimitedTextReader::create_column_mapper( + TableColumnMapperOptions options) const { + return std::make_unique(std::move(options)); +} + +Status DelimitedTextReader::open(std::shared_ptr request) { + RETURN_IF_ERROR(FileReader::open(std::move(request))); + DORIS_CHECK(_request != nullptr); + RETURN_IF_ERROR(_build_requested_columns(*_request, &_requested_columns)); + { + SCOPED_TIMER(_text_profile.open_file_time); + RETURN_IF_ERROR(_open_file()); + } + RETURN_IF_ERROR(_create_decompressor()); + { + SCOPED_TIMER(_text_profile.create_line_reader_time); + RETURN_IF_ERROR(_create_line_reader()); + } + _line_reader_eof = false; + _bom_removed = false; + _eof = false; + return Status::OK(); +} + +Status DelimitedTextReader::get_block(Block* file_block, size_t* rows, bool* eof) { + DORIS_CHECK(file_block != nullptr); + DORIS_CHECK(rows != nullptr); + DORIS_CHECK(eof != nullptr); + if (_line_reader == nullptr) { + return Status::InternalError("{} v2 reader is not open", _reader_name); + } + + const auto batch_size = _runtime_state != nullptr ? _runtime_state->batch_size() : 4096; + const auto max_block_bytes = _runtime_state != nullptr + ? _runtime_state->preferred_block_size_bytes() + : std::numeric_limits::max(); + *rows = 0; + *eof = false; + + { + auto columns_guard = file_block->mutate_columns_scoped(); + auto& columns = columns_guard.mutable_columns(); + // Delimited text readers are column-pruned but not lazy materialized: all file-local + // columns requested by TableReader are decoded before file-local conjuncts are evaluated. + while (*rows < batch_size && !_line_reader_eof && + Block::columns_byte_size(columns) < max_block_bytes) { + Slice line; + bool line_eof = false; + RETURN_IF_ERROR(_read_next_line(&line, &line_eof)); + if (line_eof) { + break; + } + RETURN_IF_ERROR(_fill_columns_from_line(line, &columns, rows)); + } + } + + const size_t rows_before_filter = *rows; + update_counter(_text_profile.rows_read_before_filter, rows_before_filter); + + MaterializedReaderFilterProfile filter_profile; + filter_profile.delete_conjunct_filter_time = _text_profile.delete_conjunct_filter_time; + filter_profile.conjunct_filter_time = _text_profile.conjunct_filter_time; + filter_profile.rows_filtered_by_delete_conjunct = + _text_profile.rows_filtered_by_delete_conjunct; + filter_profile.rows_filtered_by_conjunct = _text_profile.rows_filtered_by_conjunct; + RETURN_IF_ERROR(apply_materialized_reader_filters(_request.get(), _io_ctx.get(), file_block, + rows, &filter_profile)); + update_counter(_text_profile.rows_returned, *rows); + _reader_statistics.read_rows += *rows; + *eof = _line_reader_eof && *rows == 0; + _eof = *eof; + return Status::OK(); +} + +Status DelimitedTextReader::get_aggregate_result(const FileAggregateRequest& request, + FileAggregateResult* result) { + DORIS_CHECK(result != nullptr); + if (request.agg_type != TPushAggOp::type::COUNT) { + return Status::NotSupported("{} v2 reader only supports COUNT aggregate pushdown", + _reader_name); + } + if (_line_reader == nullptr) { + return Status::InternalError("{} v2 reader is not open", _reader_name); + } + + int64_t count = 0; + while (!_line_reader_eof) { + Slice line; + bool line_eof = false; + RETURN_IF_ERROR(_read_next_line(&line, &line_eof)); + if (line_eof) { + break; + } + if (line.size == 0) { + update_counter(_text_profile.empty_lines_read, 1); + if (_empty_line_as_record() || + (_runtime_state != nullptr && _runtime_state->is_read_csv_empty_line_as_null())) { + ++count; + } + continue; + } + RETURN_IF_ERROR(_validate_line(line)); + ++count; + } + result->count = count; + result->columns.clear(); + update_counter(_text_profile.rows_read_before_filter, count); + update_counter(_text_profile.rows_returned, count); + _reader_statistics.read_rows += count; + _eof = true; + return Status::OK(); +} + +Status DelimitedTextReader::close() { + if (_line_reader != nullptr) { + _line_reader->close(); + _line_reader.reset(); + } + _decompressor.reset(); + _file_reader.reset(); + _tracing_file_reader.reset(); + _requested_columns.clear(); + return Status::OK(); +} + +bool DelimitedTextReader::_is_null_format(Slice value) const { + if (value.size != _options.null_len) { + return false; + } + if (_options.null_len == 0) { + return true; + } + return std::memcmp(value.data, _options.null_format, value.size) == 0; +} + +Status DelimitedTextReader::_build_requested_columns(const FileScanRequest& request, + std::vector* columns) const { + DORIS_CHECK(columns != nullptr); + columns->clear(); + + // `request.local_positions` is keyed by FileReader schema local id. For delimited text readers + // that local id is the field ordinal from column_idxs, so reverse-map it to the source slot + // descriptor before choosing the serde. + std::vector by_position(request.local_positions.size()); + for (const auto& [file_column_id, block_position] : request.local_positions) { + const auto source_it = std::find(_source_column_idxs.begin(), _source_column_idxs.end(), + file_column_id.value()); + if (source_it == _source_column_idxs.end()) { + return Status::InvalidArgument("{} v2 request references unknown local column id {}", + _reader_name, file_column_id.value()); + } + const auto source_index = std::distance(_source_column_idxs.begin(), source_it); + DORIS_CHECK(source_index >= 0 && + static_cast(source_index) < _source_file_slot_descs.size()); + if (block_position.value() >= by_position.size()) { + return Status::InvalidArgument("{} v2 request has invalid block position {}", + _reader_name, block_position.value()); + } + const auto* slot = _source_file_slot_descs[source_index]; + const auto type = slot->get_data_type_ptr(); + RequestedColumn requested_column; + requested_column.file_column_id = file_column_id; + requested_column.block_position = block_position; + requested_column.slot_desc = slot; + requested_column.serde = _source_serdes[source_index]; + requested_column.nullable_string_fast_path = + type->is_nullable() && is_string_type(type->get_primitive_type()); + by_position[block_position.value()] = std::move(requested_column); + } + + for (size_t i = 0; i < by_position.size(); ++i) { + if (!by_position[i].file_column_id.is_valid()) { + return Status::InvalidArgument("{} v2 request misses block position {}", _reader_name, + i); + } + } + *columns = std::move(by_position); + return Status::OK(); +} + +Status DelimitedTextReader::_open_file() { + _start_offset = _file_description->range_start_offset; + _size = _file_description->range_size; + // Some callers, especially stream-load/http_stream, do not know the total length up front. + // For a first split this is fine: NewPlainTextLineReader can read until the underlying reader + // returns EOF. For non-first splits we still need a concrete range so the pre-read/skip-one-line + // boundary logic does not read an unbounded stream. + if (_size <= 0 && _file_description->file_size >= 0) { + _size = _file_description->file_size - _start_offset; + } + if (_size < 0 && _start_offset > 0) { + return Status::InvalidArgument("{} v2 reader requires a valid split size for {}", + _reader_name, _file_description->path); + } + _skip_lines = 0; + if (_start_offset == 0) { + if (_scan_params->file_attributes.__isset.header_type && + !_scan_params->file_attributes.header_type.empty()) { + const auto header_type = to_lower(_scan_params->file_attributes.header_type); + if (header_type == BeConsts::CSV_WITH_NAMES) { + _skip_lines = 1; + } else if (header_type == BeConsts::CSV_WITH_NAMES_AND_TYPES) { + _skip_lines = 2; + } + } else if (_scan_params->file_attributes.__isset.skip_lines) { + _skip_lines = _scan_params->file_attributes.skip_lines; + } + } else { + if (!_can_split()) { + return Status::InternalError("For now we do not support split compressed file"); + } + // Non-first splits normally start in the middle of a record. Pre-read at most one line + // delimiter byte range, then skip one line in `_read_next_line()`, so the first returned + // row is always complete. Example with '\n': + // file bytes: "1,a\n2,b\n" + // split start: ^ + // pre-read: ^ + // skipped line: "a" + // returned row: "2,b" + const int64_t pre_read_len = + std::min(static_cast(_line_delimiter.size()), _start_offset); + _start_offset -= pre_read_len; + _size += pre_read_len; + _skip_lines = 1; + } + + if (_scan_params->file_type == TFileType::FILE_STREAM) { + if (!_stream_load_id.has_value()) { + return Status::InvalidArgument("{} v2 stream reader requires load id", _reader_name); + } + // Stream load/http_stream data lives in NewLoadStreamMgr rather than a filesystem. The + // generic FileFactory path only supports real file systems, so FILE_STREAM must use the + // same pipe-reader lookup as the old CSV reader. + RETURN_IF_ERROR(FileFactory::create_pipe_reader(*_stream_load_id, &_file_reader, + _runtime_state, /*need_schema=*/false)); + } else { + auto reader_options = FileFactory::get_reader_options(_runtime_state->query_options(), + *_file_description); + auto file_reader = DORIS_TRY(FileFactory::create_file_reader( + *_system_properties, *_file_description, reader_options, _profile)); + _file_reader = _io_ctx && _io_ctx->file_reader_stats + ? std::make_shared(std::move(file_reader), + _io_ctx->file_reader_stats) + : file_reader; + } + if (_file_reader->size() == 0 && _scan_params->file_type != TFileType::FILE_STREAM && + _scan_params->file_type != TFileType::FILE_BROKER) { + return Status::EndOfFile("init reader failed, empty {} file: {}", _reader_name, + _file_description->path); + } + return Status::OK(); +} + +Status DelimitedTextReader::_read_next_line(Slice* line, bool* eof) { + DORIS_CHECK(line != nullptr); + DORIS_CHECK(eof != nullptr); + while (true) { + const uint8_t* ptr = nullptr; + size_t size = 0; + { + SCOPED_TIMER(_text_profile.read_line_time); + RETURN_IF_ERROR(_line_reader->read_line(&ptr, &size, &_line_reader_eof, _io_ctx.get())); + } + if (_line_reader_eof && size == 0) { + *eof = true; + return Status::OK(); + } + if (_skip_lines == 0 && !_bom_removed) { + // BOM is stripped only from the first logical data line. Header lines are skipped + // before this branch, so a BOM inside a skipped header does not leak into user data. + ptr = _remove_bom(ptr, &size); + _bom_removed = true; + } + if (_skip_lines > 0) { + --_skip_lines; + _bom_removed = true; + update_counter(_text_profile.skipped_lines, 1); + continue; + } + *line = Slice(ptr, size); + *eof = false; + update_counter(_text_profile.raw_lines_read, 1); + return Status::OK(); + } +} + +Status DelimitedTextReader::_fill_columns_from_line(const Slice& line, + std::vector* columns, + size_t* rows) { + DORIS_CHECK(columns != nullptr); + if (line.size == 0) { + update_counter(_text_profile.empty_lines_read, 1); + if (!_empty_line_as_record()) { + if (_runtime_state != nullptr && _runtime_state->is_read_csv_empty_line_as_null()) { + for (const auto& column : _requested_columns) { + RETURN_IF_ERROR(_append_null((*columns)[column.block_position.value()].get())); + update_counter(_text_profile.cells_deserialized, 1); + } + ++(*rows); + } + return Status::OK(); + } + } + RETURN_IF_ERROR(_validate_line(line)); + + { + SCOPED_TIMER(_text_profile.split_line_time); + _split_line(line); + } + SCOPED_TIMER(_text_profile.deserialize_time); + for (const auto& column : _requested_columns) { + auto* output = (*columns)[column.block_position.value()].get(); + const int32_t field_index = column.file_column_id.value(); + // Missing trailing fields are query-compatible with the old readers: they become NULL + // rather than shifting subsequent projected columns or rejecting the row. + Slice value = field_index >= 0 && static_cast(field_index) < _split_values.size() + ? _split_values[field_index] + : Slice(_options.null_format, _options.null_len); + RETURN_IF_ERROR(_deserialize_one_cell(column, output, _normalize_value(value))); + update_counter(_text_profile.cells_deserialized, 1); + } + ++(*rows); + return Status::OK(); +} + +Status DelimitedTextReader::_validate_line(const Slice& line) { + (void)line; + return Status::OK(); +} + +Slice DelimitedTextReader::_normalize_value(Slice value) const { + return value; +} + +bool DelimitedTextReader::_empty_line_as_record() const { + return false; +} + +bool DelimitedTextReader::_can_split() const { + return _file_compress_type == TFileCompressType::PLAIN; +} + +Status DelimitedTextReader::_append_null(IColumn* output) { + DORIS_CHECK(output != nullptr); + auto* nullable = assert_cast(output); + nullable->insert_data(nullptr, 0); + return Status::OK(); +} + +const uint8_t* DelimitedTextReader::_remove_bom(const uint8_t* ptr, size_t* size) { + DORIS_CHECK(size != nullptr); + if (ptr != nullptr && *size >= 3 && static_cast(ptr[0]) == 0xEF && + static_cast(ptr[1]) == 0xBB && static_cast(ptr[2]) == 0xBF) { + *size -= 3; + return ptr + 3; + } + return ptr; +} + +} // namespace doris::format diff --git a/be/src/format_v2/delimited_text/delimited_text_reader.h b/be/src/format_v2/delimited_text/delimited_text_reader.h new file mode 100644 index 00000000000000..06cb93dd7f7b65 --- /dev/null +++ b/be/src/format_v2/delimited_text/delimited_text_reader.h @@ -0,0 +1,176 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +#include "core/data_type_serde/data_type_serde.h" +#include "format_v2/file_reader.h" +#include "gen_cpp/PlanNodes_types.h" +#include "runtime/runtime_profile.h" +#include "util/slice.h" + +namespace doris { +class Decompressor; +class LineReader; +class SlotDescriptor; +} // namespace doris + +namespace doris::format { + +// Shared FileReader implementation for delimited text-like formats in FileScannerV2. +// +// CSV and Hive text have different row parsing and cell serde rules, but their v2 FileReader +// control flow is the same: expose a file-local schema from FE slot descriptors, resolve +// FileScanRequest local positions, read physical lines, materialize requested columns, apply +// file-local conjuncts, and optionally count rows by scanning. This base keeps that contract in one +// place while derived readers provide only format-specific hooks. +class DelimitedTextReader : public FileReader { +public: + ~DelimitedTextReader() override; + + Status init(RuntimeState* state) override; + Status get_schema(std::vector* file_schema) const override; + std::unique_ptr create_column_mapper( + TableColumnMapperOptions options) const override; + Status open(std::shared_ptr request) override; + Status get_block(Block* file_block, size_t* rows, bool* eof) override; + Status get_aggregate_result(const FileAggregateRequest& request, + FileAggregateResult* result) override; + Status close() override; + +protected: + struct DelimitedTextProfile { + RuntimeProfile::Counter* open_file_time = nullptr; + RuntimeProfile::Counter* create_line_reader_time = nullptr; + RuntimeProfile::Counter* read_line_time = nullptr; + RuntimeProfile::Counter* split_line_time = nullptr; + RuntimeProfile::Counter* deserialize_time = nullptr; + RuntimeProfile::Counter* conjunct_filter_time = nullptr; + RuntimeProfile::Counter* delete_conjunct_filter_time = nullptr; + RuntimeProfile::Counter* raw_lines_read = nullptr; + RuntimeProfile::Counter* rows_read_before_filter = nullptr; + RuntimeProfile::Counter* rows_filtered_by_conjunct = nullptr; + RuntimeProfile::Counter* rows_filtered_by_delete_conjunct = nullptr; + RuntimeProfile::Counter* rows_returned = nullptr; + RuntimeProfile::Counter* empty_lines_read = nullptr; + RuntimeProfile::Counter* skipped_lines = nullptr; + RuntimeProfile::Counter* cells_deserialized = nullptr; + }; + + struct RequestedColumn { + LocalColumnId file_column_id = LocalColumnId::invalid(); + LocalIndex block_position; + const SlotDescriptor* slot_desc = nullptr; + DataTypeSerDeSPtr serde; + bool nullable_string_fast_path = false; + }; + + DelimitedTextReader(std::shared_ptr& system_properties, + std::unique_ptr& file_description, + std::shared_ptr io_ctx, RuntimeProfile* profile, + const TFileScanRangeParams* scan_params, + const std::vector& file_slot_descs, + TFileCompressType::type range_compress_type, + std::optional stream_load_id, std::string reader_name); + + // Initialize format-specific options after the common init path has validated scan params and + // runtime state. Implementations must fill `_value_separator`, `_line_delimiter`, + // `_file_compress_type`, `_options`, and any parser-specific state before the common schema + // construction reads column_idxs. + virtual Status _init_format_state() = 0; + // Create the decompressor used by the line reader. CSV may infer compression from the file + // format enum, while Hive text uses only the explicit compress_type. + virtual Status _create_decompressor() = 0; + // Create the physical line reader. Implementations choose plain/enclosed/binary line contexts, + // but must store the result in `_line_reader` for the common get_block/count paths. + virtual Status _create_line_reader() = 0; + // Validate one logical line before splitting. CSV enforces UTF-8 for query reads; Hive text + // deliberately accepts arbitrary bytes and uses the default OK implementation. + virtual Status _validate_line(const Slice& line); + // Split one logical line into `_split_values`. The common materialization path then resolves + // requested field ordinals against `_split_values`. + virtual void _split_line(const Slice& line) = 0; + // Deserialize a single normalized field into the requested output column using the + // format-specific serde API. + virtual Status _deserialize_one_cell(const RequestedColumn& column, IColumn* output, + Slice value) = 0; + // Let formats rewrite a raw field before serde. CSV uses this for empty_field_as_null; Hive + // text keeps the raw field because empty string and NULL are distinct unless null_format + // matches exactly. + virtual Slice _normalize_value(Slice value) const; + // Whether an empty physical line is one logical record. CSV keeps the existing default + // skip behavior, while Hive TEXTFILE treats an empty line as a record with one empty field. + virtual bool _empty_line_as_record() const; + // Whether this file can start at a non-zero split offset. Compressed delimited files cannot be + // split because the decompressor needs the stream from the beginning. + virtual bool _can_split() const; + + Status _append_null(IColumn* output); + // Match the generic nullable serde semantics exactly: a field is NULL when its raw slice is + // byte-for-byte equal to null_format. This also covers Hive tables that set + // serialization.null.format to the empty string. + bool _is_null_format(Slice value) const; + const uint8_t* _remove_bom(const uint8_t* ptr, size_t* size); + void _init_profile() override; + + const TFileScanRangeParams* _scan_params = nullptr; + std::vector _source_file_slot_descs; + std::vector _source_column_idxs; + DataTypeSerDeSPtrs _source_serdes; + std::vector _file_schema; + RuntimeState* _runtime_state = nullptr; + + std::vector _requested_columns; + std::unique_ptr _decompressor; + std::unique_ptr _line_reader; + std::vector _split_values; + DataTypeSerDe::FormatOptions _options; + + std::string _value_separator; + std::string _line_delimiter; + TFileCompressType::type _file_compress_type = TFileCompressType::UNKNOWN; + TFileCompressType::type _range_compress_type = TFileCompressType::UNKNOWN; + std::optional _stream_load_id; + int64_t _start_offset = 0; + int64_t _size = -1; + int _skip_lines = 0; + char _escape = 0; + bool _line_reader_eof = false; + bool _bom_removed = false; + // FE exposes this as an optional text-file attribute. Keep the default strict so missing thrift + // fields do not accidentally accept arbitrary bytes; CSV can still opt out through the session + // variable or TVF/file-format property `enable_text_validate_utf8=false`. + bool _enable_text_validate_utf8 = true; + DelimitedTextProfile _text_profile; + +private: + Status _build_requested_columns(const FileScanRequest& request, + std::vector* columns) const; + Status _open_file(); + Status _read_next_line(Slice* line, bool* eof); + Status _fill_columns_from_line(const Slice& line, std::vector* columns, + size_t* rows); + + std::string _reader_name; +}; + +} // namespace doris::format diff --git a/be/src/format_v2/delimited_text/text_reader.cpp b/be/src/format_v2/delimited_text/text_reader.cpp new file mode 100644 index 00000000000000..930052a14f1229 --- /dev/null +++ b/be/src/format_v2/delimited_text/text_reader.cpp @@ -0,0 +1,164 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/delimited_text/text_reader.h" + +#include +#include + +#include "core/assert_cast.h" +#include "core/column/column_nullable.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type_serde/data_type_string_serde.h" +#include "format/file_reader/new_plain_text_line_reader.h" +#include "runtime/descriptors.h" +#include "util/decompressor.h" + +namespace doris::format::text { +namespace { + +bool starts_with_at(const Slice& line, size_t pos, const std::string& needle) { + return !needle.empty() && pos + needle.size() <= line.size && + std::memcmp(line.data + pos, needle.data(), needle.size()) == 0; +} + +} // namespace + +TextReader::TextReader(std::shared_ptr& system_properties, + std::unique_ptr& file_description, + std::shared_ptr io_ctx, RuntimeProfile* profile, + const TFileScanRangeParams* scan_params, + const std::vector& file_slot_descs, + TFileCompressType::type range_compress_type, + std::optional stream_load_id) + : DelimitedTextReader(system_properties, file_description, std::move(io_ctx), profile, + scan_params, file_slot_descs, range_compress_type, + std::move(stream_load_id), "Text") {} + +TextReader::~TextReader() = default; + +Status TextReader::_init_format_state() { + _file_compress_type = + _range_compress_type != TFileCompressType::UNKNOWN + ? _range_compress_type + : (_scan_params->__isset.compress_type ? _scan_params->compress_type + : TFileCompressType::PLAIN); + + const auto& text_params = _scan_params->file_attributes.text_params; + _value_separator = text_params.column_separator; + _line_delimiter = text_params.line_delimiter; + if (text_params.__isset.escape) { + _escape = text_params.escape; + } + _options.escape_char = _escape; + _options.collection_delim = + text_params.collection_delimiter.empty() ? ',' : text_params.collection_delimiter[0]; + _options.map_key_delim = + text_params.mapkv_delimiter.empty() ? ':' : text_params.mapkv_delimiter[0]; + if (text_params.__isset.null_format) { + _options.null_format = text_params.null_format.data(); + _options.null_len = text_params.null_format.length(); + } + return Status::OK(); +} + +Status TextReader::_create_decompressor() { + return Decompressor::create_decompressor(_file_compress_type, &_decompressor); +} + +Status TextReader::_create_line_reader() { + auto text_line_reader_ctx = std::make_shared( + _line_delimiter, _line_delimiter.size(), false); + _line_reader = NewPlainTextLineReader::create_unique( + _profile, _file_reader, _decompressor.get(), std::move(text_line_reader_ctx), _size, + _start_offset); + return Status::OK(); +} + +void TextReader::_split_line(const Slice& line) { + _split_values.clear(); + if (_value_separator.size() == 1) { + _split_line_single_char(line); + } else { + _split_line_multi_char(line); + } +} + +void TextReader::_split_line_single_char(const Slice& line) { + size_t value_start = 0; + for (size_t i = 0; i < line.size; ++i) { + if (line.data[i] == _value_separator[0]) { + // Hive text lets a string escape the field separator. The backslash remains in the + // field slice so deserialize_one_cell_from_hive_text() can unescape the final value. + if (_escape != 0 && i > 0 && line.data[i - 1] == _escape) { + continue; + } + _split_values.emplace_back(line.data + value_start, i - value_start); + value_start = i + _value_separator.size(); + } + } + _split_values.emplace_back(line.data + value_start, line.size - value_start); +} + +void TextReader::_split_line_multi_char(const Slice& line) { + size_t value_start = 0; + size_t i = 0; + while (i < line.size) { + if (starts_with_at(line, i, _value_separator)) { + if (_escape != 0 && i > 0 && line.data[i - 1] == _escape) { + ++i; + continue; + } + _split_values.emplace_back(line.data + value_start, i - value_start); + i += _value_separator.size(); + value_start = i; + continue; + } + ++i; + } + _split_values.emplace_back(line.data + value_start, line.size - value_start); +} + +Status TextReader::_deserialize_one_cell(const RequestedColumn& column, IColumn* output, + Slice value) { + DORIS_CHECK(output != nullptr); + if (column.nullable_string_fast_path) { + auto& null_column = assert_cast(*output); + if (_is_null_format(value)) { + null_column.insert_data(nullptr, 0); + return Status::OK(); + } + static DataTypeStringSerDe string_serde(TYPE_STRING); + auto status = string_serde.deserialize_one_cell_from_hive_text( + null_column.get_nested_column(), value, _options); + if (!status.ok()) { + null_column.insert_data(nullptr, 0); + return Status::OK(); + } + null_column.get_null_map_data().push_back(0); + return Status::OK(); + } + return column.serde->deserialize_one_cell_from_hive_text(*output, value, _options); +} + +bool TextReader::_empty_line_as_record() const { + // Hive TEXTFILE treats an empty physical line as a record. The splitter maps it + // to one empty field and missing trailing fields are filled with null_format. + return true; +} + +} // namespace doris::format::text diff --git a/be/src/format_v2/delimited_text/text_reader.h b/be/src/format_v2/delimited_text/text_reader.h new file mode 100644 index 00000000000000..8efbfe359c7e64 --- /dev/null +++ b/be/src/format_v2/delimited_text/text_reader.h @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "format_v2/delimited_text/delimited_text_reader.h" +#include "gen_cpp/PlanNodes_types.h" +#include "util/slice.h" + +namespace doris { +class SlotDescriptor; +} // namespace doris + +namespace doris::format::text { + +// FileScannerV2 Hive text reader. +// +// Text files do not have embedded schema, so FE-provided file slots and column_idxs are converted +// into a file-local schema in the same way as CSV v2. The row parser is intentionally different +// from CSV: field splitting follows Hive text escaping rules and cells are deserialized through +// deserialize_one_cell_from_hive_text(). +class TextReader final : public ::doris::format::DelimitedTextReader { +public: + TextReader(std::shared_ptr& system_properties, + std::unique_ptr& file_description, + std::shared_ptr io_ctx, RuntimeProfile* profile, + const TFileScanRangeParams* scan_params, + const std::vector& file_slot_descs, + TFileCompressType::type range_compress_type = TFileCompressType::UNKNOWN, + std::optional stream_load_id = std::nullopt); + ~TextReader() override; + +private: + Status _init_format_state() override; + Status _create_decompressor() override; + Status _create_line_reader() override; + void _split_line(const Slice& line) override; + void _split_line_single_char(const Slice& line); + void _split_line_multi_char(const Slice& line); + Status _deserialize_one_cell(const RequestedColumn& column, IColumn* output, + Slice value) override; + bool _empty_line_as_record() const override; +}; + +} // namespace doris::format::text diff --git a/be/src/format_v2/expr/cast.cpp b/be/src/format_v2/expr/cast.cpp new file mode 100644 index 00000000000000..efeb9d851deb22 --- /dev/null +++ b/be/src/format_v2/expr/cast.cpp @@ -0,0 +1,131 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/expr/cast.h" + +#include +#include +#include + +#include + +#include "common/status.h" +#include "core/block/block.h" +#include "core/block/column_with_type_and_name.h" +#include "core/block/columns_with_type_and_name.h" +#include "exprs/function/simple_function_factory.h" +#include "exprs/vexpr_context.h" +#include "exprs/vliteral.h" + +namespace doris::format { + +Status Cast::prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) { + RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context)); + if (_children.size() != 1) { + return Status::InternalError( + fmt::format("Cast should have exactly 1 child expr, but got {}", _children.size())); + } + ColumnsWithTypeAndName argument_template; + argument_template.reserve(_children.size()); + if (_children[0]->is_literal()) { + // For some functions, he needs some literal columns to derive the return type. + auto literal_node = std::dynamic_pointer_cast(_children[0]); + argument_template.emplace_back(literal_node->get_column_ptr(), _children[0]->data_type(), + _children[0]->expr_name()); + } else { + argument_template.emplace_back(nullptr, _children[0]->data_type(), + _children[0]->expr_name()); + } + + _expr_name = fmt::format("CAST(arguments={},return={})", _children[0]->data_type()->get_name(), + _data_type->get_name()); + // get the function. won't prepare function. + _function = SimpleFunctionFactory::instance().get_function( + "CAST", argument_template, _data_type, + {.new_version_unix_timestamp = state->query_options().new_version_unix_timestamp}, + state->be_exec_version()); + if (_function == nullptr) { + return Status::InternalError("Could not find function {} ", _expr_name); + } + VExpr::register_function_context(state, context); + _prepare_finished = true; + return Status::OK(); +} + +Status Cast::open(RuntimeState* state, VExprContext* context, + FunctionContext::FunctionStateScope scope) { + DCHECK(_prepare_finished); + for (auto& i : _children) { + RETURN_IF_ERROR(i->open(state, context, scope)); + } + RETURN_IF_ERROR(VExpr::init_function_context(state, context, scope, _function)); + if (scope == FunctionContext::FRAGMENT_LOCAL) { + RETURN_IF_ERROR(VExpr::get_const_col(context, nullptr)); + } + _open_finished = true; + return Status::OK(); +} + +void Cast::close(VExprContext* context, FunctionContext::FunctionStateScope scope) { + VExpr::close_function_context(context, scope, _function); + VExpr::close(context, scope); +} + +Status Cast::execute_column_impl(VExprContext* context, const Block* block, + const Selector* selector, size_t count, + ColumnPtr& result_column) const { + return _do_execute(context, block, selector, count, result_column); +} + +std::string Cast::debug_string() const { + return _expr_name; +} + +Status Cast::_do_execute(VExprContext* context, const Block* block, const Selector* selector, + size_t count, ColumnPtr& result_column) const { + DCHECK(_open_finished || block == nullptr) << debug_string(); + if (_children.size() != 1) { + return Status::InternalError( + fmt::format("Cast should have exactly 1 child expr, but got {}", _children.size())); + } + if (is_const_and_have_executed()) { // const have executed in open function + result_column = get_result_from_const(count); + return Status::OK(); + } + + Block temp_block; + ColumnNumbers args(1); + + ColumnPtr tmp_arg_column; + RETURN_IF_ERROR(_children[0]->execute_column(context, block, selector, count, tmp_arg_column)); + auto arg_type = _children[0]->execute_type(block); + temp_block.insert({tmp_arg_column, arg_type, _children[0]->expr_name()}); + args[0] = 0; + + uint32_t num_columns_without_result = temp_block.columns(); + // prepare a column to save result + temp_block.insert({nullptr, _data_type, _expr_name}); + + RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), temp_block, args, + num_columns_without_result, count)); + result_column = temp_block.get_by_position(num_columns_without_result).column; + DCHECK_EQ(result_column->size(), count); + RETURN_IF_ERROR(result_column->column_self_check()); + return Status::OK(); +} + +} // namespace doris::format diff --git a/be/src/format_v2/expr/cast.h b/be/src/format_v2/expr/cast.h new file mode 100644 index 00000000000000..1dc06bcf07f2bc --- /dev/null +++ b/be/src/format_v2/expr/cast.h @@ -0,0 +1,68 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "common/object_pool.h" +#include "common/status.h" +#include "exprs/function_context.h" +#include "exprs/vexpr.h" + +namespace doris { +class RowDescriptor; +class RuntimeState; +class TExprNode; +class Block; +class VExprContext; +} // namespace doris + +namespace doris::format { + +class Cast final : public VExpr { + ENABLE_FACTORY_CREATOR(Cast); + +public: + Cast(const DataTypePtr& type) { + _node_type = TExprNodeType::CAST_EXPR; + _opcode = TExprOpcode::CAST; + _data_type = type; + } + ~Cast() override = default; + Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override; + Status open(RuntimeState* state, VExprContext* context, + FunctionContext::FunctionStateScope scope) override; + void close(VExprContext* context, FunctionContext::FunctionStateScope scope) override; + Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, + size_t count, ColumnPtr& result_column) const override; + std::string debug_string() const override; + uint64_t get_digest(uint64_t seed) const override { return 0; } + const std::string& expr_name() const override { return _expr_name; } + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = Cast::create_shared(_data_type); + return Status::OK(); + } + +private: + Status _do_execute(VExprContext* context, const Block* block, const Selector* selector, + size_t count, ColumnPtr& result_column) const; + std::string _expr_name; + FunctionBasePtr _function; +}; +} // namespace doris::format diff --git a/be/src/format_v2/expr/delete_predicate.cpp b/be/src/format_v2/expr/delete_predicate.cpp new file mode 100644 index 00000000000000..9ab1090247c15a --- /dev/null +++ b/be/src/format_v2/expr/delete_predicate.cpp @@ -0,0 +1,122 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/expr/delete_predicate.h" + +#include +#include +#include + +#include +#include +#include + +#include "common/status.h" +#include "core/block/block.h" +#include "core/block/column_numbers.h" +#include "core/block/column_with_type_and_name.h" +#include "core/block/columns_with_type_and_name.h" + +namespace doris::format { + +DeletePredicate::DeletePredicate(const std::vector& deleted_rows) + : VExpr(), _deleted_rows(deleted_rows) { + _node_type = TExprNodeType::PREDICATE; + _opcode = TExprOpcode::DELETE; + _data_type = std::make_shared(); +} + +Status DeletePredicate::prepare(RuntimeState* state, const RowDescriptor& desc, + VExprContext* context) { + RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context)); + _expr_name = "DeletePredicate"; + _prepare_finished = true; + return Status::OK(); +} + +Status DeletePredicate::open(RuntimeState* state, VExprContext* context, + FunctionContext::FunctionStateScope scope) { + DCHECK(_prepare_finished); + RETURN_IF_ERROR_OR_PREPARED(VExpr::open(state, context, scope)); + _open_finished = true; + return Status::OK(); +} + +void DeletePredicate::close(VExprContext* context, FunctionContext::FunctionStateScope scope) { + VExpr::close(context, scope); +} + +/** + * DeletePredicate is derived from 2 cases: + * 1. All row IDs indicates deleted rows. (e.g. Delete rows with row_id in (1, 2, 3)) + * 2. Bit vector indicates whether each row is deleted or not. (e.g. Bit vector[0,1,0,0,1] indicates row 1 and row 4 are deleted) + * + * So DeletePredicate should have exactly 1 child expr, which is the slot of row id. + * Row IDs should be generated by file reader as a virtual column in `block`. + **/ +Status DeletePredicate::execute(VExprContext* context, Block* block, int* result_column_id) const { + if (_children.size() != 1) { + return Status::InternalError(fmt::format( + "DeletePredicate should have exactly 1 child expr, but got {}", _children.size())); + } + int slot = -1; + RETURN_IF_ERROR(_children[0]->execute(context, block, &slot)); + if (slot < 0 || static_cast(slot) >= block->columns()) { + return Status::InternalError( + "DeletePredicate row id child returned invalid column id {}, block has {} columns", + slot, block->columns()); + } + const auto& row_ids = + assert_cast(*block->get_by_position(slot).column).get_data(); + const auto count = row_ids.size(); + auto res_col = ColumnBool::create(count, 0); + if (_deleted_rows.empty()) { + block->insert({std::move(res_col), std::make_shared(), expr_name()}); + *result_column_id = static_cast(block->get_columns().size() - 1); + return Status::OK(); + } + if (count == 0) { + block->insert({std::move(res_col), std::make_shared(), expr_name()}); + *result_column_id = static_cast(block->get_columns().size() - 1); + return Status::OK(); + } + const int64_t* delete_rows = _deleted_rows.data(); + const int64_t* delete_rows_end = delete_rows + _deleted_rows.size(); + const int64_t* start_pos = std::lower_bound(delete_rows, delete_rows_end, row_ids[0]); + int64_t start_index = start_pos - delete_rows; + const int64_t* end_pos = std::upper_bound(start_pos, delete_rows_end, row_ids[count - 1]); + const int64_t end_index = end_pos - delete_rows; + + while (start_index < end_index) { + int64_t delete_row = delete_rows[start_index]; + if (const auto it = std::ranges::lower_bound(row_ids, delete_row); + it != row_ids.end() && *it == delete_row) { + const size_t index = it - row_ids.begin(); + res_col->get_data()[index] = true; + } + ++start_index; + } + block->insert({std::move(res_col), std::make_shared(), expr_name()}); + *result_column_id = static_cast(block->get_columns().size() - 1); + return Status::OK(); +} + +std::string DeletePredicate::debug_string() const { + return _expr_name; +} + +} // namespace doris::format diff --git a/be/src/format_v2/expr/delete_predicate.h b/be/src/format_v2/expr/delete_predicate.h new file mode 100644 index 00000000000000..dce2de3edf278e --- /dev/null +++ b/be/src/format_v2/expr/delete_predicate.h @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "common/object_pool.h" +#include "common/status.h" +#include "exprs/function_context.h" +#include "exprs/vexpr.h" + +namespace doris { +class RowDescriptor; +class RuntimeState; +class TExprNode; +class Block; +class VExprContext; +} // namespace doris + +namespace doris::format { + +class DeletePredicate final : public VExpr { + ENABLE_FACTORY_CREATOR(DeletePredicate); + +public: + DeletePredicate(const std::vector& deleted_rows); + ~DeletePredicate() override = default; + Status execute(VExprContext* context, Block* block, int* result_column_id) const override; + Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, + size_t count, ColumnPtr& result_column) const override { + return Status::InternalError("Not implement DeletePredicate::execute_column_impl"); + } + Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override; + Status open(RuntimeState* state, VExprContext* context, + FunctionContext::FunctionStateScope scope) override; + void close(VExprContext* context, FunctionContext::FunctionStateScope scope) override; + std::string debug_string() const override; + uint64_t get_digest(uint64_t seed) const override { return 0; } + const std::string& expr_name() const override { return _expr_name; } + +private: + std::string _expr_name; + const std::vector& _deleted_rows; +}; +} // namespace doris::format diff --git a/be/src/format_v2/expr/equality_delete_predicate.cpp b/be/src/format_v2/expr/equality_delete_predicate.cpp new file mode 100644 index 00000000000000..13454e3b22f116 --- /dev/null +++ b/be/src/format_v2/expr/equality_delete_predicate.cpp @@ -0,0 +1,159 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/expr/equality_delete_predicate.h" + +#include + +#include + +#include "common/status.h" +#include "core/assert_cast.h" +#include "core/block/column_with_type_and_name.h" +#include "core/column/column_nullable.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_number.h" + +namespace doris::format { +namespace { + +bool column_value_equal(const ColumnPtr& lhs, size_t lhs_row, const ColumnPtr& rhs, + size_t rhs_row) { + if (lhs->is_nullable() && rhs->is_nullable()) { + return lhs->compare_at(lhs_row, rhs_row, *rhs, -1) == 0; + } + if (lhs->is_nullable()) { + const auto& nullable_lhs = assert_cast(*lhs); + return !nullable_lhs.is_null_at(lhs_row) && + nullable_lhs.get_nested_column().compare_at(lhs_row, rhs_row, *rhs, -1) == 0; + } + if (rhs->is_nullable()) { + const auto& nullable_rhs = assert_cast(*rhs); + return !nullable_rhs.is_null_at(rhs_row) && + lhs->compare_at(lhs_row, rhs_row, nullable_rhs.get_nested_column(), -1) == 0; + } + return lhs->compare_at(lhs_row, rhs_row, *rhs, -1) == 0; +} + +} // namespace + +EqualityDeletePredicate::EqualityDeletePredicate(Block delete_block, std::vector field_ids) + : VExpr(), _delete_block(std::move(delete_block)), _field_ids(std::move(field_ids)) { + _node_type = TExprNodeType::PREDICATE; + _opcode = TExprOpcode::DELETE; + _data_type = std::make_shared(); + _expr_name = "EqualityDeletePredicate"; + DCHECK_EQ(_delete_block.columns(), _field_ids.size()); + _delete_hashes = _build_hashes(_delete_block); + for (size_t row = 0; row < _delete_hashes.size(); ++row) { + _delete_hash_map.emplace(_delete_hashes[row], row); + } +} + +Status EqualityDeletePredicate::prepare(RuntimeState* state, const RowDescriptor& desc, + VExprContext* context) { + RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context)); + _expr_name = "EqualityDeletePredicate"; + _prepare_finished = true; + return Status::OK(); +} + +Status EqualityDeletePredicate::open(RuntimeState* state, VExprContext* context, + FunctionContext::FunctionStateScope scope) { + DCHECK(_prepare_finished); + for (auto& child : _children) { + RETURN_IF_ERROR(child->open(state, context, scope)); + } + if (scope == FunctionContext::FRAGMENT_LOCAL) { + RETURN_IF_ERROR(VExpr::get_const_col(context, nullptr)); + } + _open_finished = true; + return Status::OK(); +} + +void EqualityDeletePredicate::close(VExprContext* context, + FunctionContext::FunctionStateScope scope) { + VExpr::close(context, scope); +} + +Status EqualityDeletePredicate::execute(VExprContext* context, Block* block, + int* result_column_id) const { + if (_children.size() != _field_ids.size()) { + return Status::InternalError( + "EqualityDeletePredicate should have {} child exprs, but got {}", _field_ids.size(), + _children.size()); + } + + Block data_key_block; + for (const auto& child : _children) { + Block eval_block = *block; + int slot = -1; + RETURN_IF_ERROR(child->execute(context, &eval_block, &slot)); + const auto& key_column = eval_block.get_by_position(slot); + data_key_block.insert({key_column.column, key_column.type, key_column.name}); + } + + const auto rows = data_key_block.rows(); + auto res_col = ColumnBool::create(rows, 0); + if (_delete_hash_map.empty() || rows == 0) { + block->insert({std::move(res_col), std::make_shared(), expr_name()}); + *result_column_id = static_cast(block->columns() - 1); + return Status::OK(); + } + + auto data_hashes = _build_hashes(data_key_block); + auto& result_data = res_col->get_data(); + for (size_t row = 0; row < rows; ++row) { + const auto range = _delete_hash_map.equal_range(data_hashes[row]); + for (auto it = range.first; it != range.second; ++it) { + if (_equal(data_key_block, row, it->second)) { + result_data[row] = true; + break; + } + } + } + + block->insert({std::move(res_col), std::make_shared(), expr_name()}); + *result_column_id = static_cast(block->columns() - 1); + return Status::OK(); +} + +std::vector EqualityDeletePredicate::_build_hashes(const Block& block) { + std::vector hashes(block.rows(), 0); + for (const auto& column : block.get_columns()) { + column->update_hashes_with_value(hashes.data(), nullptr); + } + return hashes; +} + +bool EqualityDeletePredicate::_equal(const Block& data_block, size_t data_row, + size_t delete_row) const { + for (size_t column_idx = 0; column_idx < _delete_block.columns(); ++column_idx) { + const auto& data_column = data_block.get_by_position(column_idx).column; + const auto& delete_column = _delete_block.get_by_position(column_idx).column; + if (!column_value_equal(data_column, data_row, delete_column, delete_row)) { + return false; + } + } + return true; +} + +std::string EqualityDeletePredicate::debug_string() const { + return _expr_name; +} + +} // namespace doris::format diff --git a/be/src/format_v2/expr/equality_delete_predicate.h b/be/src/format_v2/expr/equality_delete_predicate.h new file mode 100644 index 00000000000000..cad16ca387ccd8 --- /dev/null +++ b/be/src/format_v2/expr/equality_delete_predicate.h @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "core/block/block.h" +#include "exprs/function_context.h" +#include "exprs/vexpr.h" + +namespace doris { +class RowDescriptor; +class RuntimeState; +class VExprContext; +} // namespace doris + +namespace doris::format { + +class EqualityDeletePredicate final : public VExpr { + ENABLE_FACTORY_CREATOR(EqualityDeletePredicate); + +public: + EqualityDeletePredicate(Block delete_block, std::vector field_ids); + ~EqualityDeletePredicate() override = default; + + Status execute(VExprContext* context, Block* block, int* result_column_id) const override; + Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, + size_t count, ColumnPtr& result_column) const override { + return Status::InternalError("Not implement EqualityDeletePredicate::execute_column_impl"); + } + Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override; + Status open(RuntimeState* state, VExprContext* context, + FunctionContext::FunctionStateScope scope) override; + void close(VExprContext* context, FunctionContext::FunctionStateScope scope) override; + std::string debug_string() const override; + uint64_t get_digest(uint64_t seed) const override { return 0; } + const std::string& expr_name() const override { return _expr_name; } + +private: + static std::vector _build_hashes(const Block& block); + bool _equal(const Block& data_block, size_t data_row, size_t delete_row) const; + + std::string _expr_name; + Block _delete_block; + std::vector _field_ids; + std::vector _delete_hashes; + std::multimap _delete_hash_map; +}; + +} // namespace doris::format diff --git a/be/src/format_v2/file_reader.cpp b/be/src/format_v2/file_reader.cpp new file mode 100644 index 00000000000000..31b3f27c69797d --- /dev/null +++ b/be/src/format_v2/file_reader.cpp @@ -0,0 +1,209 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/file_reader.h" + +#include + +#include "format_v2/column_mapper.h" +#include "io/fs/buffered_reader.h" +#include "io/fs/tracing_file_reader.h" +#include "runtime/runtime_state.h" + +namespace doris::format { +namespace { + +std::unique_ptr clone_struct_predicate_target( + const std::unique_ptr& target) { + return target == nullptr ? nullptr : std::make_unique(*target); +} + +template +std::string join_debug_strings(const std::vector& values, Formatter formatter) { + std::ostringstream out; + out << "["; + for (size_t i = 0; i < values.size(); ++i) { + if (i > 0) { + out << ", "; + } + out << formatter(values[i]); + } + out << "]"; + return out.str(); +} + +std::string int_vector_debug_string(const std::vector& values) { + std::ostringstream out; + out << "["; + for (size_t i = 0; i < values.size(); ++i) { + if (i > 0) { + out << ", "; + } + out << values[i]; + } + out << "]"; + return out.str(); +} + +void append_struct_predicate_path(const FileStructPredicateTarget* target, + std::vector* path) { + DORIS_CHECK(path != nullptr); + for (const auto* current = target; current != nullptr; current = current->child.get()) { + path->push_back(current->file_local_id); + } +} + +std::string struct_predicate_target_debug_string(const FileStructPredicateTarget* target) { + if (target == nullptr) { + return "null"; + } + std::ostringstream out; + out << "{file_local_id=" << target->file_local_id + << ", file_child_name=" << target->file_child_name + << ", child=" << struct_predicate_target_debug_string(target->child.get()) << "}"; + return out.str(); +} + +bool struct_predicate_targets_equal(const FileStructPredicateTarget* lhs, + const FileStructPredicateTarget* rhs) { + while (lhs != nullptr && rhs != nullptr) { + if (lhs->file_local_id != rhs->file_local_id) { + return false; + } + lhs = lhs->child.get(); + rhs = rhs->child.get(); + } + return lhs == nullptr && rhs == nullptr; +} + +} // namespace + +FileStructPredicateTarget::FileStructPredicateTarget(const FileStructPredicateTarget& other) + : file_local_id(other.file_local_id), + file_child_name(other.file_child_name), + child(clone_struct_predicate_target(other.child)) {} + +FileStructPredicateTarget& FileStructPredicateTarget::operator=( + const FileStructPredicateTarget& other) { + if (this == &other) { + return *this; + } + file_local_id = other.file_local_id; + file_child_name = other.file_child_name; + child = clone_struct_predicate_target(other.child); + return *this; +} + +FileNestedPredicateTarget::FileNestedPredicateTarget(const FileNestedPredicateTarget& other) + : file_column_id(other.file_column_id), + struct_target(clone_struct_predicate_target(other.struct_target)) {} + +FileNestedPredicateTarget& FileNestedPredicateTarget::operator=( + const FileNestedPredicateTarget& other) { + if (this == &other) { + return *this; + } + file_column_id = other.file_column_id; + struct_target = clone_struct_predicate_target(other.struct_target); + return *this; +} + +LocalColumnId FileColumnPredicateFilter::effective_file_column_id() const { + return target.is_valid() ? target.file_column_id : file_column_id; +} + +std::vector FileColumnPredicateFilter::effective_file_child_id_path() const { + if (!target.is_valid()) { + return file_child_id_path; + } + std::vector path; + append_struct_predicate_path(target.struct_target.get(), &path); + return path; +} + +bool FileColumnPredicateFilter::same_target_as(const FileColumnPredicateFilter& other) const { + if (target.is_valid() && other.target.is_valid()) { + return target.file_column_id == other.target.file_column_id && + struct_predicate_targets_equal(target.struct_target.get(), + other.target.struct_target.get()); + } + return effective_file_column_id() == other.effective_file_column_id() && + effective_file_child_id_path() == other.effective_file_child_id_path(); +} + +std::string FileColumnPredicateFilter::debug_string() const { + std::ostringstream out; + out << "FileColumnPredicateFilter{target={file_column_id=" << effective_file_column_id() + << ", struct_target=" << struct_predicate_target_debug_string(target.struct_target.get()) + << "}, file_child_id_path=" << int_vector_debug_string(effective_file_child_id_path()) + << ", predicate_count=" << predicates.size() << "}"; + return out.str(); +} + +std::string FileScanRequest::debug_string() const { + std::ostringstream out; + out << "FileScanRequest{predicate_columns=" + << join_debug_strings( + predicate_columns, + [](const LocalColumnIndex& projection) { return projection.debug_string(); }) + << ", non_predicate_columns=" + << join_debug_strings( + non_predicate_columns, + [](const LocalColumnIndex& projection) { return projection.debug_string(); }) + << ", local_positions={"; + size_t position_idx = 0; + for (const auto& [column_id, block_position] : local_positions) { + if (position_idx++ > 0) { + out << ", "; + } + out << column_id << ":" << block_position; + } + out << "}, conjunct_count=" << conjuncts.size() + << ", delete_conjunct_count=" << delete_conjuncts.size() << ", column_predicate_filters=" + << join_debug_strings( + column_predicate_filters, + [](const FileColumnPredicateFilter& filter) { return filter.debug_string(); }) + << "}"; + return out.str(); +} + +Status FileReader::init(RuntimeState* state) { + _init_profile(); + SCOPED_RAW_TIMER(&_reader_statistics.file_reader_create_time); + ++_reader_statistics.open_file_num; + io::FileReaderOptions reader_options = + FileFactory::get_reader_options(state->query_options(), *_file_description); + _file_reader = DORIS_TRY(io::DelegateReader::create_file_reader( + _profile, *_system_properties, *_file_description, reader_options, + io::DelegateReader::AccessMode::RANDOM, _io_ctx)); + // IOContext can be present without file_reader_stats in standalone tests or callers that only + // need extra IO state. TracingFileReader dereferences the stats pointer on every read, so only + // wrap the physical reader when stats collection is actually available. + _tracing_file_reader = _io_ctx && _io_ctx->file_reader_stats + ? std::make_shared( + _file_reader, _io_ctx->file_reader_stats) + : _file_reader; + _eof = false; + return Status::OK(); +} + +std::unique_ptr FileReader::create_column_mapper( + TableColumnMapperOptions options) const { + return std::make_unique(std::move(options)); +} + +} // namespace doris::format diff --git a/be/src/format_v2/file_reader.h b/be/src/format_v2/file_reader.h new file mode 100644 index 00000000000000..76b8bea60973ac --- /dev/null +++ b/be/src/format_v2/file_reader.h @@ -0,0 +1,400 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "core/data_type/data_type.h" +#include "core/field.h" +#include "exprs/vexpr_fwd.h" +#include "format_v2/column_data.h" +#include "gen_cpp/PlanNodes_types.h" +#include "io/file_factory.h" +#include "io/fs/file_reader_writer_fwd.h" + +namespace doris { +class Block; +class ColumnPredicate; +struct ConditionCacheContext; + +namespace io { +struct IOContext; +} // namespace io +} // namespace doris + +namespace doris::format { + +class TableColumnMapper; +struct TableColumnMapperOptions; + +// Struct-only nested predicate target used by file-layer pruning. +// This intentionally models only a STRUCT field chain. LIST/MAP/repeated predicates need explicit +// quantified semantics, so they must not be encoded here. +struct FileStructPredicateTarget { + int32_t file_local_id = -1; + std::string file_child_name; + std::unique_ptr child; + + FileStructPredicateTarget() = default; + FileStructPredicateTarget(int32_t local_id, std::string child_name, + std::unique_ptr nested_child = nullptr) + : file_local_id(local_id), + file_child_name(std::move(child_name)), + child(std::move(nested_child)) {} + FileStructPredicateTarget(const FileStructPredicateTarget& other); + FileStructPredicateTarget& operator=(const FileStructPredicateTarget& other); + FileStructPredicateTarget(FileStructPredicateTarget&& other) noexcept = default; + FileStructPredicateTarget& operator=(FileStructPredicateTarget&& other) noexcept = default; +}; + +struct FileNestedPredicateTarget { + LocalColumnId file_column_id = LocalColumnId::invalid(); + // Null means the predicate targets the top-level primitive column itself. + std::unique_ptr struct_target; + + FileNestedPredicateTarget() = default; + explicit FileNestedPredicateTarget(LocalColumnId column_id) : file_column_id(column_id) {} + FileNestedPredicateTarget(LocalColumnId column_id, + std::unique_ptr target) + : file_column_id(column_id), struct_target(std::move(target)) {} + FileNestedPredicateTarget(const FileNestedPredicateTarget& other); + FileNestedPredicateTarget& operator=(const FileNestedPredicateTarget& other); + FileNestedPredicateTarget(FileNestedPredicateTarget&& other) noexcept = default; + FileNestedPredicateTarget& operator=(FileNestedPredicateTarget&& other) noexcept = default; + + bool is_valid() const { return file_column_id.is_valid(); } +}; + +// File-local single-column predicates for file-layer pruning, such as min/max, page index, +// dictionary and bloom filter. +// Predicates must all belong to target.file_column_id. target.struct_target points to the nested +// primitive leaf under that root; null means the top-level column itself is the primitive leaf. +// These predicates are pruning hints only and are not row-level conjuncts. +struct FileColumnPredicateFilter { + FileNestedPredicateTarget target; + // Compatibility fields for call sites and tests that still construct pruning filters directly. + // New mapper code should fill target; file readers consume target first and only fall back to + // these fields while the API migration is in progress. + LocalColumnId file_column_id = LocalColumnId::invalid(); + std::vector file_child_id_path; + std::vector> predicates; + + LocalColumnId effective_file_column_id() const; + std::vector effective_file_child_id_path() const; + bool same_target_as(const FileColumnPredicateFilter& other) const; + std::string debug_string() const; +}; + +enum class FileFormat { + PARQUET, + ORC, + CSV, + JSON, + TEXT, + JNI, + NATIVE, + ARROW, +}; + +struct FileScanRequest { + virtual ~FileScanRequest() = default; + + std::string debug_string() const; + + // Columns that must be read before row-level filtering. They are materialized eagerly because + // conjuncts/delete_conjuncts need them to decide the selected rows. + std::vector predicate_columns; + // Columns read after row-level filtering. Predicate columns are also available for output and + // should not be duplicated here. + std::vector non_predicate_columns; + // file-local column id -> file-local output block position. + std::map local_positions; + // Row-level filters converted to file-local expressions from table-level predicates. + VExprContextSPtrs conjuncts; + // Delete predicates converted to file-local expressions. + VExprContextSPtrs delete_conjuncts; + // Single-column predicates used only for file-layer pruning, such as statistics, page index, + // dictionary and bloom filter. They must not be used for batch row-level filtering. + std::vector column_predicate_filters; +}; + +// Helper for constructing the scan-column layout in FileScanRequest. +// FileScanRequest keeps predicate and non-predicate columns separate because columnar readers such +// as Parquet can read predicate columns first, filter rows, and then lazily read the remaining +// projected columns. The two lists still share one file-local output block, whose positions are +// stored in local_positions. This builder centralizes the mechanical rules for that shared layout: +// - each root file column gets one stable block position; +// - predicate columns dominate non-predicate columns because they are already returned in the file +// block and can be reused for final materialization; +// - repeated nested projections for the same root are merged instead of duplicated. +// TableColumnMapper should still own table-to-file semantic resolution. This helper only owns the +// FileScanRequest layout contract after a file-local projection has been produced. +class FileScanRequestBuilder { +public: + explicit FileScanRequestBuilder(FileScanRequest* request) : _request(request) { + DORIS_CHECK(_request != nullptr); + } + + Status add_predicate_column(LocalColumnIndex projection) { + return _add_column(std::move(projection), &_request->predicate_columns, + /*is_predicate_column=*/true); + } + + Status add_non_predicate_column(LocalColumnIndex projection) { + return _add_column(std::move(projection), &_request->non_predicate_columns, + /*is_predicate_column=*/false); + } + + Status add_predicate_column(LocalColumnId column_id) { + return add_predicate_column(LocalColumnIndex::top_level(column_id)); + } + + Status add_non_predicate_column(LocalColumnId column_id) { + return add_non_predicate_column(LocalColumnIndex::top_level(column_id)); + } + +private: + static LocalIndex _next_block_position(const FileScanRequest& request) { + size_t next_position = 0; + for (const auto& [_, block_position] : request.local_positions) { + next_position = std::max(next_position, block_position.value() + 1); + } + return LocalIndex(next_position); + } + + static void _sort_projection_children_by_file_id(LocalColumnIndex* projection) { + DORIS_CHECK(projection != nullptr); + if (projection->project_all_children) { + return; + } + for (auto& child : projection->children) { + _sort_projection_children_by_file_id(&child); + } + std::ranges::sort(projection->children, + [](const LocalColumnIndex& lhs, const LocalColumnIndex& rhs) { + return lhs.local_id() < rhs.local_id(); + }); + } + + Status _add_column(LocalColumnIndex projection, std::vector* scan_columns, + bool is_predicate_column) { + DORIS_CHECK(scan_columns != nullptr); + const auto file_column_id = projection.column_id(); + DORIS_CHECK(file_column_id != LocalColumnId::invalid()); + if (!is_predicate_column && + std::ranges::find_if(_request->predicate_columns, [&](const LocalColumnIndex& p) { + return p.column_id() == file_column_id; + }) != _request->predicate_columns.end()) { + return Status::OK(); + } + if (!_request->local_positions.contains(file_column_id)) { + _request->local_positions.emplace(file_column_id, _next_block_position(*_request)); + } + + _sort_projection_children_by_file_id(&projection); + auto existing_projection_it = std::ranges::find_if( + *scan_columns, + [&](const LocalColumnIndex& p) { return p.column_id() == file_column_id; }); + if (existing_projection_it == scan_columns->end()) { + scan_columns->push_back(std::move(projection)); + } else { + RETURN_IF_ERROR(merge_local_column_index(&*existing_projection_it, projection)); + _sort_projection_children_by_file_id(&*existing_projection_it); + } + + if (is_predicate_column) { + auto it = std::ranges::find_if( + _request->non_predicate_columns, + [&](const LocalColumnIndex& p) { return p.column_id() == file_column_id; }); + if (it != _request->non_predicate_columns.end()) { + _request->non_predicate_columns.erase(it); + } + } + return Status::OK(); + } + + FileScanRequest* _request = nullptr; +}; + +struct FileAggregateRequest { + struct Column { + // File-local projection for the aggregate column. For nested MIN/MAX, this points to the + // single primitive leaf that can be represented by file statistics. For COUNT(col), this + // points to the top-level column whose NULL-ness should be counted. + LocalColumnIndex projection; + }; + + TPushAggOp::type agg_type = TPushAggOp::type::NONE; + // Empty for COUNT(*)/row-count pushdown. Non-empty for COUNT(col), where the file reader must + // return the number of non-NULL rows for the requested column instead of total rows. + std::vector columns; +}; + +struct FileAggregateResult { + struct Column { + // Mirrors FileAggregateRequest::Column::projection so TableReader can put the returned + // aggregate value back into the matching projected nested shape. + LocalColumnIndex projection; + bool has_min = false; + bool has_max = false; + Field min_value; + Field max_value; + }; + + int64_t count = 0; + std::vector columns; +}; + +/** + * +-----> get_schema() -----------------+ + * FileReader() -----> init() ----| -----> close() + * +-----> open() -----> get_block() ----+ + */ +class FileReader { +public: + struct ReaderStatistics { + int32_t filtered_row_groups = 0; + int32_t filtered_row_groups_by_min_max = 0; + int32_t filtered_row_groups_by_bloom_filter = 0; + int32_t read_row_groups = 0; + int64_t filtered_group_rows = 0; + int64_t filtered_page_rows = 0; + int64_t lazy_read_filtered_rows = 0; + int64_t read_rows = 0; + int64_t filtered_bytes = 0; + int64_t column_read_time = 0; + int64_t parse_meta_time = 0; + int64_t parse_footer_time = 0; + int64_t file_footer_read_calls = 0; + int64_t file_footer_hit_cache = 0; + int64_t file_reader_create_time = 0; + int64_t open_file_num = 0; + int64_t row_group_filter_time = 0; + int64_t page_index_filter_time = 0; + int64_t read_page_index_time = 0; + int64_t parse_page_index_time = 0; + int64_t predicate_filter_time = 0; + int64_t dict_filter_rewrite_time = 0; + int64_t bloom_filter_read_time = 0; + }; + + FileReader(std::shared_ptr& system_properties, + std::unique_ptr& file_description, + std::shared_ptr io_ctx, RuntimeProfile* profile) + : _system_properties(system_properties), + _file_description(std::move(file_description)), + _io_ctx(io_ctx), + _profile(profile) {} + virtual ~FileReader() = default; + + // Initialize file reader and parse file metadata. + virtual Status init(RuntimeState* state); + + // Set the maximum row count for the next physical read batch. Readers that do not batch by + // rows may ignore it. + virtual void set_batch_size(size_t batch_size) { (void)batch_size; } + + // Get semantic file-local schema from file metadata. The file schema is determined by file + // format and file content, and does not contain table/global schema semantics. A file reader may + // expose raw file identifiers, such as Parquet field_id, through ColumnDefinition::identifier, + // but it must not interpret table-format semantics such as Iceberg name mapping, + // default/generated columns, or partition columns. File-format physical wrappers should be + // normalized away before exposing this schema; for example, Parquet MAP is exposed as key/value + // children rather than key_value/entry. + // Doris plans external-table scan types as nullable, including all nested children of complex + // types. This protects Doris from illegal or inconsistent values produced by external systems. + // Therefore every ColumnDefinition::type returned here must be nullable. Complex types must + // also expose nullable child types recursively, even if the physical file marks those fields as + // required. + // This method can only be called after init() successfully, but does not require open() to be + // called. + virtual Status get_schema(std::vector* file_schema) const = 0; + + // Create the mapper that matches this reader's scan-request capabilities. TableReader still + // owns table-format semantics such as BY_NAME/BY_FIELD_ID/BY_INDEX, partition values and + // default expressions; the FileReader only chooses whether file-local requests support columnar + // lazy materialization/pruning or must materialize one flat list of required columns. + virtual std::unique_ptr create_column_mapper( + TableColumnMapperOptions options) const; + + // Open the file reader with file-local scan request. The file reader should initialize its internal state according to the request, but does not need to interpret table/global schema semantics. For example, all schema change, filter localization, default/generated/partition columns should be handled in table reader layer. This method can only be called after init() successfully. + virtual Status open(std::shared_ptr request) { + _request = std::move(request); + return Status::OK(); + } + + virtual Status get_block(Block* file_block, size_t* rows, bool* eof) { + if (rows != nullptr) { + *rows = 0; + } + if (eof != nullptr) { + *eof = true; + } + _eof = true; + return Status::OK(); + } + + virtual Status get_aggregate_result(const FileAggregateRequest& request, + FileAggregateResult* result) { + return Status::NotSupported("FileReader does not support aggregate pushdown"); + } + + // Condition cache is managed by TableReader and consumed by physical file readers. + // On cache HIT, readers may skip granules whose cached bit is false before doing column IO. + // On cache MISS, readers mark a granule true when row-level predicates keep at least one row + // in that granule. Readers that cannot map batch rows to stable file-global row ids should + // keep the default no-op implementation. + virtual void set_condition_cache_context(std::shared_ptr ctx) {} + + // Total rows covered by this physical reader. TableReader uses it to pre-size the miss bitmap. + // Readers should return 0 if the metadata is unavailable or the row coordinate is unstable. + virtual int64_t get_total_rows() const { return 0; } + + virtual Status close() { + _file_reader.reset(); + _tracing_file_reader.reset(); + _io_ctx.reset(); + _eof = true; + return Status::OK(); + } + +protected: + virtual void _init_profile() {} + + io::FileReaderSPtr _file_reader; + // _tracing_file_reader wraps _file_reader. + // _file_reader is original file reader. + // _tracing_file_reader is tracing file reader with io context. + // If io_ctx is null, _tracing_file_reader will be the same as file_reader. + io::FileReaderSPtr _tracing_file_reader = nullptr; + std::shared_ptr _request; + bool _eof = true; + ReaderStatistics _reader_statistics; + std::shared_ptr _system_properties; + std::unique_ptr _file_description; + std::shared_ptr _io_ctx; + RuntimeProfile* _profile = nullptr; +}; + +} // namespace doris::format diff --git a/be/src/format_v2/jni/hudi_jni_reader.cpp b/be/src/format_v2/jni/hudi_jni_reader.cpp new file mode 100644 index 00000000000000..3247e3c683c2de --- /dev/null +++ b/be/src/format_v2/jni/hudi_jni_reader.cpp @@ -0,0 +1,167 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/jni/hudi_jni_reader.h" + +#include + +#include "core/block/block.h" +#include "exprs/vexpr_context.h" +#include "util/string_util.h" +#include "util/uid_util.h" + +namespace doris::format::hudi { +namespace { + +constexpr std::string_view HOODIE_CONF_PREFIX = "hoodie."; +constexpr std::string_view HADOOP_CONF_PREFIX = "hadoop_conf."; + +} // namespace + +Status HudiJniReader::validate_scan_range(const TFileRangeDesc& range) const { + if (!range.__isset.table_format_params) { + return Status::InternalError("missing table_format_params for hudi jni reader"); + } + if (!range.table_format_params.__isset.hudi_params) { + return Status::InternalError("missing hudi_params for hudi jni reader"); + } + const auto& hudi_params = range.table_format_params.hudi_params; + if (!hudi_params.__isset.base_path || hudi_params.base_path.empty()) { + return Status::InternalError( + "missing base_path for hudi jni reader, possibly caused by FE/BE protocol " + "mismatch"); + } + if (!hudi_params.__isset.data_file_path || hudi_params.data_file_path.empty()) { + return Status::InternalError( + "missing data_file_path for hudi jni reader, possibly caused by FE/BE protocol " + "mismatch"); + } + if (!hudi_params.__isset.data_file_length) { + return Status::InternalError( + "missing data_file_length for hudi jni reader, possibly caused by FE/BE " + "protocol mismatch"); + } + if (!hudi_params.__isset.column_names) { + return Status::InternalError( + "missing column_names for hudi jni reader, possibly caused by FE/BE protocol " + "mismatch"); + } + if (!hudi_params.__isset.column_types) { + return Status::InternalError( + "missing column_types for hudi jni reader, possibly caused by FE/BE protocol " + "mismatch"); + } + DORIS_CHECK(hudi_params.column_names.size() == hudi_params.column_types.size()); + if (_scan_params == nullptr) { + return Status::InternalError( + "missing scan params for hudi jni reader, possibly caused by FE/BE protocol " + "mismatch"); + } + return Status::OK(); +} + +std::string HudiJniReader::connector_class() const { + return "org/apache/doris/hudi/HadoopHudiJniScanner"; +} + +Status HudiJniReader::build_scanner_params(std::map* params) const { + DORIS_CHECK(params != nullptr); + DORIS_CHECK(_scan_params != nullptr); + params->clear(); + + const auto& hudi_params = _current_range.table_format_params.hudi_params; + (*params)["base_path"] = hudi_params.base_path; + (*params)["data_file_path"] = hudi_params.data_file_path; + (*params)["data_file_length"] = std::to_string(hudi_params.data_file_length); + (*params)["delta_file_paths"] = join(hudi_params.delta_logs, ","); + (*params)["hudi_column_names"] = join(hudi_params.column_names, ","); + (*params)["hudi_column_types"] = join(hudi_params.column_types, "#"); + (*params)["instant_time"] = hudi_params.instant_time; + (*params)["serde"] = hudi_params.serde; + (*params)["input_format"] = hudi_params.input_format; + if (_runtime_state != nullptr) { + (*params)["query_id"] = print_id(_runtime_state->query_id()); + } + + for (const auto& kv : _scan_params->properties) { + if (kv.first.starts_with(HOODIE_CONF_PREFIX)) { + (*params)[kv.first] = kv.second; + } else { + (*params)[std::string(HADOOP_CONF_PREFIX) + kv.first] = kv.second; + } + } + return Status::OK(); +} + +Status HudiJniReader::build_jni_columns( + std::vector* columns) const { + DORIS_CHECK(columns != nullptr); + columns->clear(); + columns->reserve(_projected_columns.size()); + for (size_t i = 0; i < _projected_columns.size(); ++i) { + const auto& table_column = _projected_columns[i]; + if (table_column.is_partition_key && + find_partition_value(table_column, _partition_values) != nullptr) { + continue; + } + columns->push_back({ + .java_name = table_column.name, + .output_index = i, + .output_type = table_column.type, + .transfer_type = table_column.type, + .replace_type = "not_replace", + }); + } + return Status::OK(); +} + +Status HudiJniReader::finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows) { + DORIS_CHECK(jni_block != nullptr); + DORIS_CHECK(output_block != nullptr); + DORIS_CHECK(rows != nullptr); + const auto original_rows = *rows; + + const auto& columns = jni_columns(); + DORIS_CHECK(columns.size() == jni_block->columns()); + for (size_t i = 0; i < columns.size(); ++i) { + const auto& column = columns[i]; + DORIS_CHECK(column.output_index < output_block->columns()); + output_block->get_by_position(column.output_index).type = column.output_type; + output_block->replace_by_position(column.output_index, + jni_block->get_by_position(i).column); + } + + for (size_t i = 0; i < _projected_columns.size(); ++i) { + const auto& table_column = _projected_columns[i]; + const auto* partition_value = find_partition_value(table_column, _partition_values); + if (!table_column.is_partition_key || partition_value == nullptr) { + continue; + } + output_block->get_by_position(i).type = table_column.type; + output_block->replace_by_position( + i, table_column.type->create_column_const(original_rows, *partition_value)); + } + DORIS_CHECK(output_block->rows() == original_rows); + if (!_conjuncts.empty()) { + RETURN_IF_ERROR( + VExprContext::filter_block(_conjuncts, output_block, output_block->columns())); + } + *rows = output_block->rows(); + return Status::OK(); +} + +} // namespace doris::format::hudi diff --git a/be/src/format_v2/jni/hudi_jni_reader.h b/be/src/format_v2/jni/hudi_jni_reader.h new file mode 100644 index 00000000000000..4beb6f2d1728b6 --- /dev/null +++ b/be/src/format_v2/jni/hudi_jni_reader.h @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include "common/status.h" +#include "format_v2/jni/jni_table_reader.h" +#include "gen_cpp/PlanNodes_types.h" + +namespace doris::format::hudi { + +class HudiJniReader final : public format::JniTableReader { +public: + ~HudiJniReader() override = default; + +protected: + std::string connector_class() const override; + Status validate_scan_range(const TFileRangeDesc& range) const override; + Status build_scanner_params(std::map* params) const override; + Status build_jni_columns( + std::vector* columns) const override; + Status finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows) override; +}; + +} // namespace doris::format::hudi diff --git a/be/src/format_v2/jni/iceberg_sys_table_reader.cpp b/be/src/format_v2/jni/iceberg_sys_table_reader.cpp new file mode 100644 index 00000000000000..b41d505f886d31 --- /dev/null +++ b/be/src/format_v2/jni/iceberg_sys_table_reader.cpp @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/jni/iceberg_sys_table_reader.h" + +#include + +#include "format/jni/jni_data_bridge.h" +#include "util/string_util.h" + +namespace doris::format::iceberg { +namespace { + +constexpr std::string_view HADOOP_OPTION_PREFIX = "hadoop."; + +} // namespace + +Status IcebergSysTableJniReader::validate_scan_range(const TFileRangeDesc& range) const { + if (!range.__isset.table_format_params) { + return Status::InternalError( + "missing table_format_params for iceberg sys table jni reader"); + } + if (!range.table_format_params.__isset.iceberg_params) { + return Status::InternalError("missing iceberg_params for iceberg sys table jni reader"); + } + if (!range.table_format_params.iceberg_params.__isset.serialized_split || + range.table_format_params.iceberg_params.serialized_split.empty()) { + return Status::InternalError( + "missing serialized_split for iceberg sys table jni reader, " + "possibly caused by FE/BE protocol mismatch"); + } + return Status::OK(); +} + +std::string IcebergSysTableJniReader::connector_class() const { + return "org/apache/doris/iceberg/IcebergSysTableJniScanner"; +} + +Status IcebergSysTableJniReader::build_scanner_params( + std::map* params) const { + DORIS_CHECK(params != nullptr); + params->clear(); + params->emplace("serialized_split", + _current_range.table_format_params.iceberg_params.serialized_split); + + std::vector required_types; + required_types.reserve(_projected_columns.size()); + for (const auto& column : _projected_columns) { + required_types.emplace_back(JniDataBridge::get_jni_type_with_different_string(column.type)); + } + (*params)["required_types"] = join(required_types, "#"); + + if (_scan_params != nullptr && _scan_params->__isset.properties && + !_scan_params->properties.empty()) { + for (const auto& kv : _scan_params->properties) { + (*params)[std::string(HADOOP_OPTION_PREFIX) + kv.first] = kv.second; + } + } + return Status::OK(); +} + +} // namespace doris::format::iceberg diff --git a/be/src/format_v2/jni/iceberg_sys_table_reader.h b/be/src/format_v2/jni/iceberg_sys_table_reader.h new file mode 100644 index 00000000000000..be254c39f3ffb5 --- /dev/null +++ b/be/src/format_v2/jni/iceberg_sys_table_reader.h @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include "common/status.h" +#include "format_v2/jni/jni_table_reader.h" +#include "gen_cpp/PlanNodes_types.h" + +namespace doris::format::iceberg { + +class IcebergSysTableJniReader final : public format::JniTableReader { +public: + ~IcebergSysTableJniReader() override = default; + +protected: + std::string connector_class() const override; + Status validate_scan_range(const TFileRangeDesc& range) const override; + Status build_scanner_params(std::map* params) const override; +}; + +} // namespace doris::format::iceberg diff --git a/be/src/format_v2/jni/jdbc_reader.cpp b/be/src/format_v2/jni/jdbc_reader.cpp new file mode 100644 index 00000000000000..e0391f3a13a8f0 --- /dev/null +++ b/be/src/format_v2/jni/jdbc_reader.cpp @@ -0,0 +1,187 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/jni/jdbc_reader.h" + +#include +#include + +#include "common/cast_set.h" +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/block/columns_with_type_and_name.h" +#include "core/column/column_nullable.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_string.h" +#include "exprs/function/simple_function_factory.h" +#include "exprs/vexpr_context.h" +#include "format_v2/table_reader.h" +#include "util/jdbc_utils.h" + +namespace doris::format::jdbc { + +std::string JdbcJniReader::connector_class() const { + return "org/apache/doris/jdbc/JdbcJniScanner"; +} + +Status JdbcJniReader::prepare_split(const format::SplitReadOptions& options) { + _jdbc_params.clear(); + if (options.current_range.__isset.table_format_params && + options.current_range.table_format_params.table_format_type == "jdbc") { + _jdbc_params = std::map( + options.current_range.table_format_params.jdbc_params.begin(), + options.current_range.table_format_params.jdbc_params.end()); + } + return format::JniTableReader::prepare_split(options); +} + +// need pass to the java side, so the java scanner can parse the params and construct the JDBC connection +Status JdbcJniReader::build_scanner_params(std::map* params) const { + DORIS_CHECK(params != nullptr); + *params = _jdbc_params; + if (params->contains("jdbc_driver_url")) { + std::string resolved; + if (JdbcUtils::resolve_driver_url((*params)["jdbc_driver_url"], &resolved).ok()) { + (*params)["jdbc_driver_url"] = resolved; + } + } + return Status::OK(); +} + +Status JdbcJniReader::build_jni_columns( + std::vector* columns) const { + DORIS_CHECK(columns != nullptr); + columns->clear(); + columns->reserve(_projected_columns.size()); + for (size_t i = 0; i < _projected_columns.size(); ++i) { + const auto& table_column = _projected_columns[i]; + const auto primitive_type = remove_nullable(table_column.type)->get_primitive_type(); + columns->push_back({ + .java_name = table_column.name, + .output_index = i, + .output_type = table_column.type, + .transfer_type = _transfer_type_for(table_column.type), + .replace_type = _replace_type_for(primitive_type), + }); + } + return Status::OK(); +} + +Status JdbcJniReader::finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows) { + DORIS_CHECK(jni_block != nullptr); + DORIS_CHECK(output_block != nullptr); + DORIS_CHECK(rows != nullptr); + const auto original_rows = *rows; + const auto& columns = jni_columns(); + DORIS_CHECK(columns.size() == jni_block->columns()); + + for (size_t i = 0; i < columns.size(); ++i) { + const auto& column = columns[i]; + DORIS_CHECK(column.output_type != nullptr); + DORIS_CHECK(column.output_index < output_block->columns()); + if (_is_special_type(remove_nullable(column.output_type)->get_primitive_type())) { + RETURN_IF_ERROR(_cast_string_to_special_type(column, jni_block, i, output_block, + original_rows)); + continue; + } + output_block->get_by_position(column.output_index).type = column.output_type; + output_block->replace_by_position(column.output_index, + jni_block->get_by_position(i).column); + } + DORIS_CHECK(output_block->rows() == original_rows); + if (!_conjuncts.empty()) { + RETURN_IF_ERROR( + VExprContext::filter_block(_conjuncts, output_block, output_block->columns())); + } + *rows = output_block->rows(); + return Status::OK(); +} + +std::string JdbcJniReader::_replace_type_for(PrimitiveType type) const { + switch (type) { + case PrimitiveType::TYPE_BITMAP: + return "bitmap"; + case PrimitiveType::TYPE_HLL: + return "hll"; + case PrimitiveType::TYPE_QUANTILE_STATE: + return "quantile_state"; + case PrimitiveType::TYPE_JSONB: + return "jsonb"; + default: + return "not_replace"; + } +} + +bool JdbcJniReader::_is_special_type(PrimitiveType type) const { + return type == PrimitiveType::TYPE_BITMAP || type == PrimitiveType::TYPE_HLL || + type == PrimitiveType::TYPE_QUANTILE_STATE || type == PrimitiveType::TYPE_JSONB; +} + +DataTypePtr JdbcJniReader::_transfer_type_for(const DataTypePtr& output_type) const { + DORIS_CHECK(output_type != nullptr); + if (!_is_special_type(remove_nullable(output_type)->get_primitive_type())) { + return output_type; + } + DataTypePtr string_type = std::make_shared(); + if (output_type->is_nullable()) { + string_type = make_nullable(string_type); + } + return string_type; +} + +Status JdbcJniReader::_cast_string_to_special_type(const format::JniTableReader::JniColumn& column, + Block* jni_block, size_t jni_column_index, + Block* output_block, size_t rows) { + DORIS_CHECK(column.output_type != nullptr); + DORIS_CHECK(column.transfer_type != nullptr); + const auto target_type = column.output_type; + const auto target_type_name = target_type->get_name(); + + ColumnPtr input_column = jni_block->get_by_position(jni_column_index).column; + ColumnPtr cast_param = target_type->create_column_const_with_default_value(1); + + ColumnsWithTypeAndName argument_template; + argument_template.reserve(2); + argument_template.emplace_back(std::move(input_column), column.transfer_type, + "java.sql.String"); + argument_template.emplace_back(std::move(cast_param), target_type, target_type_name); + + FunctionBasePtr cast_function = SimpleFunctionFactory::instance().get_function( + "CAST", argument_template, make_nullable(target_type)); + if (cast_function == nullptr) { + return Status::InternalError("Failed to find CAST function for type {}", target_type_name); + } + + Block cast_block(argument_template); + const auto result_idx = cast_set(cast_block.columns()); + cast_block.insert({nullptr, make_nullable(target_type), "cast_result"}); + RETURN_IF_ERROR( + cast_function->execute(nullptr, cast_block, {0}, result_idx, cast_set(rows))); + + auto result_column = cast_block.get_by_position(result_idx).column; + output_block->get_by_position(column.output_index).type = target_type; + if (target_type->is_nullable()) { + output_block->replace_by_position(column.output_index, result_column); + } else { + const auto* nullable_column = assert_cast(result_column.get()); + output_block->replace_by_position(column.output_index, + nullable_column->get_nested_column_ptr()); + } + return Status::OK(); +} + +} // namespace doris::format::jdbc diff --git a/be/src/format_v2/jni/jdbc_reader.h b/be/src/format_v2/jni/jdbc_reader.h new file mode 100644 index 00000000000000..91a5878cb4622f --- /dev/null +++ b/be/src/format_v2/jni/jdbc_reader.h @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include "common/status.h" +#include "core/data_type/data_type.h" +#include "core/types.h" +#include "format_v2/jni/jni_table_reader.h" +#include "gen_cpp/PlanNodes_types.h" + +namespace doris::format::jdbc { + +class JdbcJniReader final : public format::JniTableReader { +public: + ~JdbcJniReader() override = default; + + Status prepare_split(const format::SplitReadOptions& options) override; + +protected: + std::string connector_class() const override; + Status build_scanner_params(std::map* params) const override; + Status build_jni_columns( + std::vector* columns) const override; + Status finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows) override; + +private: + bool _is_special_type(PrimitiveType type) const; + std::string _replace_type_for(PrimitiveType type) const; + DataTypePtr _transfer_type_for(const DataTypePtr& output_type) const; + Status _cast_string_to_special_type(const format::JniTableReader::JniColumn& column, + Block* jni_block, size_t jni_column_index, + Block* output_block, size_t rows); + + std::map _jdbc_params; +}; + +} // namespace doris::format::jdbc diff --git a/be/src/format_v2/jni/jni_table_reader.cpp b/be/src/format_v2/jni/jni_table_reader.cpp new file mode 100644 index 00000000000000..d43a22e632b26b --- /dev/null +++ b/be/src/format_v2/jni/jni_table_reader.cpp @@ -0,0 +1,386 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/jni/jni_table_reader.h" + +#include + +#include "common/cast_set.h" +#include "core/block/block.h" +#include "exprs/vexpr_context.h" +#include "runtime/descriptors.h" +#include "runtime/runtime_state.h" +#include "util/string_util.h" + +namespace doris::format { + +Status JniTableReader::init(TableReadOptions&& options) { + RETURN_IF_ERROR(TableReader::init(std::move(options))); + _init_profile(); + + // JNI readers do not go through TableReader::open_reader(), where file-local filters are + // prepared for file readers. They execute table-level conjuncts directly on the JNI block. + RowDescriptor row_desc; + for (const auto& conjunct : _conjuncts) { + RETURN_IF_ERROR(conjunct->prepare(_runtime_state, row_desc)); + RETURN_IF_ERROR(conjunct->open(_runtime_state)); + } + return Status::OK(); +} + +Status JniTableReader::prepare_split(const SplitReadOptions& options) { + _current_range = options.current_range; + RETURN_IF_ERROR(validate_scan_range(options.current_range)); + RETURN_IF_ERROR(TableReader::prepare_split(options)); + DORIS_CHECK(!_closed); + DORIS_CHECK(!_scanner_opened); + if (_is_table_level_count_active()) { + return Status::OK(); + } + // Subclasses populate split-specific scanner params before calling this method, so the Java + // scanner can be opened here instead of being lazily opened by the first get_block() call. + return _open_jni_scanner(); +} + +Status JniTableReader::get_block(Block* output_block, bool* eos) { + DORIS_CHECK(output_block != nullptr); + DORIS_CHECK(eos != nullptr); + DORIS_CHECK(output_block->columns() == _projected_columns.size()); + output_block->clear_column_data(_projected_columns.size()); + if (_is_table_level_count_active()) { + return _read_table_level_count(output_block, eos); + } + + DORIS_CHECK(_scanner_opened); + if (_eof) { + *eos = true; + return Status::OK(); + } + + while (true) { + size_t current_rows = 0; + bool current_eof = false; + // get next block data from Java scanner, and fill the data to _jni_block_template + RETURN_IF_ERROR(_get_next_jni_block(¤t_rows, ¤t_eof)); + if (current_eof) { + _eof = true; + RETURN_IF_ERROR(_close_jni_scanner()); + *eos = true; + return Status::OK(); + } + + RETURN_IF_ERROR(finalize_jni_block(&_jni_block_template, output_block, ¤t_rows)); + if (current_rows == 0) { + output_block->clear_column_data(_projected_columns.size()); + continue; + } + *eos = false; + return Status::OK(); + } +} + +Status JniTableReader::_get_next_jni_block(size_t* rows, bool* eof) { + DORIS_CHECK(rows != nullptr); + DORIS_CHECK(eof != nullptr); + *rows = 0; + _jni_block_template.clear_column_data(_jni_columns.size()); + + JNIEnv* env = nullptr; + RETURN_IF_ERROR(Jni::Env::Get(&env)); + long meta_address = 0; + { + SCOPED_RAW_TIMER(&_java_scan_watcher); + //getNextBatchMeta function, return the meta address + RETURN_IF_ERROR(_jni_scanner_obj.call_long_method(env, _jni_scanner_get_next_batch) + .call(&meta_address)); + } + RETURN_ERROR_IF_EXC(env); + if (meta_address == 0) { + *eof = true; + return Status::OK(); + } + + JniDataBridge::TableMetaAddress table_meta(meta_address); + const auto num_rows = table_meta.next_meta_as_long(); + if (num_rows == 0) { + *eof = true; + return Status::OK(); + } + + *rows = cast_set(num_rows); + // fill data from Java table meta to C++ block + RETURN_IF_ERROR(_fill_jni_block(table_meta, *rows)); + // call releaseTable() method in JAVA side to release the Java table Heap free Memory + RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_release_table).call()); + RETURN_ERROR_IF_EXC(env); + *eof = false; + return Status::OK(); +} + +// Java table to C++ block +Status JniTableReader::_fill_jni_block(JniDataBridge::TableMetaAddress& table_meta, + size_t num_rows) { + SCOPED_RAW_TIMER(&_fill_block_watcher); + JNIEnv* env = nullptr; + RETURN_IF_ERROR(Jni::Env::Get(&env)); + for (size_t i = 0; i < _jni_columns.size(); ++i) { + const auto& read_column = _jni_columns[i]; + auto& column_with_type_and_name = _jni_block_template.get_by_position(i); + auto& column_ptr = column_with_type_and_name.column; + RETURN_IF_ERROR(JniDataBridge::fill_column(table_meta, column_ptr, + read_column.transfer_type, num_rows)); + // call releaseColumn(int columnIndex) method in JAVA side to release the Java column Heap free Memory + RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_release_column) + .with_arg(cast_set(i)) + .call()); + RETURN_ERROR_IF_EXC(env); + } + return Status::OK(); +} + +Status JniTableReader::finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows) { + DORIS_CHECK(jni_block != nullptr); + DORIS_CHECK(output_block != nullptr); + DORIS_CHECK(rows != nullptr); + DORIS_CHECK(jni_block->columns() == _jni_columns.size()); + const auto original_rows = *rows; + for (size_t i = 0; i < _jni_columns.size(); ++i) { + const auto& column = _jni_columns[i]; + DORIS_CHECK(column.output_index < output_block->columns()); + output_block->get_by_position(column.output_index).type = column.output_type; + output_block->replace_by_position(column.output_index, + jni_block->get_by_position(i).column); + } + DORIS_CHECK(output_block->rows() == original_rows); + // Apply conjuncts on the output block + if (!_conjuncts.empty()) { + RETURN_IF_ERROR( + VExprContext::filter_block(_conjuncts, output_block, output_block->columns())); + } + *rows = output_block->rows(); + return Status::OK(); +} + +Status JniTableReader::build_jni_columns(std::vector* columns) const { + DORIS_CHECK(columns != nullptr); + columns->clear(); + columns->reserve(_projected_columns.size()); + for (size_t i = 0; i < _projected_columns.size(); ++i) { + const auto& table_column = _projected_columns[i]; + columns->push_back({ + .java_name = table_column.name, + .output_index = i, + .output_type = table_column.type, + .transfer_type = table_column.type, + .replace_type = "not_replace", + }); + } + return Status::OK(); +} + +int64_t JniTableReader::self_split_weight() const { + return _current_range.__isset.self_split_weight ? _current_range.self_split_weight : -1; +} + +Status JniTableReader::close() { + if (_closed) { + return Status::OK(); + } + _closed = true; + RETURN_IF_ERROR(_close_jni_scanner()); + return TableReader::close(); +} + +Status JniTableReader::_close_jni_scanner() { + if (!_scanner_opened) { + JNIEnv* env = nullptr; + if (!_jni_scanner_obj.uninitialized()) { + RETURN_IF_ERROR(Jni::Env::Get(&env)); + } + _reset_split_state(env); + return Status::OK(); + } + + JNIEnv* env = nullptr; + RETURN_IF_ERROR(Jni::Env::Get(&env)); + if (_scanner_profile != nullptr) { + COUNTER_UPDATE(_open_scanner_time, _jni_scanner_open_watcher); + COUNTER_UPDATE(_fill_block_time, _fill_block_watcher); + } + + RETURN_ERROR_IF_EXC(env); + jlong append_data_time = 0; + RETURN_IF_ERROR(_jni_scanner_obj.call_long_method(env, _jni_scanner_get_append_data_time) + .call(&append_data_time)); + jlong create_vector_table_time = 0; + RETURN_IF_ERROR( + _jni_scanner_obj.call_long_method(env, _jni_scanner_get_create_vector_table_time) + .call(&create_vector_table_time)); + if (_scanner_profile != nullptr) { + COUNTER_UPDATE(_java_append_data_time, append_data_time); + COUNTER_UPDATE(_java_create_vector_table_time, create_vector_table_time); + COUNTER_UPDATE(_java_scan_time, + _java_scan_watcher - append_data_time - create_vector_table_time); + _max_time_split_weight_counter->conditional_update( + _jni_scanner_open_watcher + _fill_block_watcher + _java_scan_watcher, + self_split_weight()); + } + + // _fill_jni_block may fail before releasing the current Java table. JniScanner::releaseTable() + // is idempotent, so closing the split always releases it. + RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_release_table).call()); + RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_close).call()); + _reset_split_state(env); + return Status::OK(); +} + +void JniTableReader::_reset_split_state(JNIEnv* env) { + if (!_jni_scanner_obj.uninitialized()) { + DORIS_CHECK(env != nullptr); + _jni_scanner_obj.reset(env); + } + _scanner_opened = false; + _eof = false; + _scanner_params.clear(); + _jni_columns.clear(); + _jni_block_template.clear(); + _jni_scanner_open_watcher = 0; + _java_scan_watcher = 0; + _fill_block_watcher = 0; +} + +Status JniTableReader::_open_jni_scanner() { + // subclasses build map _scanner_params to JAVA side + RETURN_IF_ERROR(build_scanner_params(&_scanner_params)); + // subclasses build _jni_columns info to JAVA side, including column name and column type + RETURN_IF_ERROR(build_jni_columns(&_jni_columns)); + // _jni_columns info is used to build Java scanner schema params and JNI block template. + _prepare_jni_scanner_schema(); + + if (_runtime_state != nullptr && _batch_size == 0) { + _batch_size = _runtime_state->batch_size(); + } + if (_runtime_state != nullptr) { + _scanner_params["time_zone"] = _runtime_state->timezone(); + } + + JNIEnv* env = nullptr; + RETURN_IF_ERROR(Jni::Env::Get(&env)); + SCOPED_RAW_TIMER(&_jni_scanner_open_watcher); + RETURN_IF_ERROR(_register_jni_class_functions_once(env)); + RETURN_IF_ERROR(_create_jni_scanner_object(env, cast_set(_batch_size))); + // call open() method in JAVA side. + RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_open).call()); + RETURN_ERROR_IF_EXC(env); + + _scanner_opened = true; + return Status::OK(); +} + +void JniTableReader::_prepare_jni_scanner_schema() { + std::vector required_fields; + std::vector column_types; + std::vector replace_types; + required_fields.reserve(_jni_columns.size()); + column_types.reserve(_jni_columns.size()); + replace_types.reserve(_jni_columns.size()); + _jni_block_template.clear(); + _jni_block_template.reserve(_jni_columns.size()); + + bool has_replace_type = false; + for (const auto& column : _jni_columns) { + DORIS_CHECK(column.transfer_type != nullptr); + required_fields.push_back(column.java_name); + column_types.push_back( + JniDataBridge::get_jni_type_with_different_string(column.transfer_type)); + replace_types.push_back(column.replace_type); + has_replace_type = has_replace_type || column.replace_type != "not_replace"; + _jni_block_template.insert( + {column.transfer_type->create_column(), column.transfer_type, column.java_name}); + } + _scanner_params["required_fields"] = join(required_fields, ","); + _scanner_params["columns_types"] = join(column_types, "#"); + if (has_replace_type) { + _scanner_params["replace_string"] = join(replace_types, ","); + } +} + +Status JniTableReader::_register_jni_class_functions_once(JNIEnv* env) { + if (!_jni_scanner_cls.uninitialized()) { + return Status::OK(); + } + + RETURN_IF_ERROR( + Jni::Util::get_jni_scanner_class(env, connector_class().c_str(), &_jni_scanner_cls)); + RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "", "(ILjava/util/Map;)V", + &_jni_scanner_constructor)); + RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "open", "()V", &_jni_scanner_open)); + RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getNextBatchMeta", "()J", + &_jni_scanner_get_next_batch)); + RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getAppendDataTime", "()J", + &_jni_scanner_get_append_data_time)); + RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getCreateVectorTableTime", "()J", + &_jni_scanner_get_create_vector_table_time)); + RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "close", "()V", &_jni_scanner_close)); + RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "releaseColumn", "(I)V", + &_jni_scanner_release_column)); + RETURN_IF_ERROR( + _jni_scanner_cls.get_method(env, "releaseTable", "()V", &_jni_scanner_release_table)); + RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getStatistics", "()Ljava/util/Map;", + &_jni_scanner_get_statistics)); + RETURN_IF_ERROR( + _jni_scanner_cls.get_method(env, "setBatchSize", "(I)V", &_jni_scanner_set_batch_size)); + return Status::OK(); +} + +Status JniTableReader::_create_jni_scanner_object(JNIEnv* env, int batch_size) { + DORIS_CHECK(!_jni_scanner_cls.uninitialized()); + DORIS_CHECK(!_jni_scanner_constructor.uninitialized()); + DORIS_CHECK(_jni_scanner_obj.uninitialized()); + Jni::LocalObject hashmap_object; + RETURN_IF_ERROR(Jni::Util::convert_to_java_map(env, _scanner_params, &hashmap_object)); + RETURN_IF_ERROR(_jni_scanner_cls.new_object(env, _jni_scanner_constructor) + .with_arg(batch_size) + .with_arg(hashmap_object) + .call(&_jni_scanner_obj)); + return Status::OK(); +} + +void JniTableReader::_init_profile() { + if (_scanner_profile == nullptr) { + return; + } + const auto connector_name = _connector_name(); + ADD_TIMER(_scanner_profile, connector_name); + _open_scanner_time = ADD_CHILD_TIMER(_scanner_profile, "OpenScannerTime", connector_name); + _java_scan_time = ADD_CHILD_TIMER(_scanner_profile, "JavaScanTime", connector_name); + _java_append_data_time = + ADD_CHILD_TIMER(_scanner_profile, "JavaAppendDataTime", connector_name); + _java_create_vector_table_time = + ADD_CHILD_TIMER(_scanner_profile, "JavaCreateVectorTableTime", connector_name); + _fill_block_time = ADD_CHILD_TIMER(_scanner_profile, "FillBlockTime", connector_name); + _max_time_split_weight_counter = _scanner_profile->add_conditition_counter( + "MaxTimeSplitWeight", TUnit::UNIT, [](int64_t _c, int64_t c) { return c > _c; }, + connector_name); +} + +std::string JniTableReader::_connector_name() const { + const auto parts = split(connector_class(), "/"); + return parts.empty() ? connector_class() : parts.back(); +} + +} // namespace doris::format diff --git a/be/src/format_v2/jni/jni_table_reader.h b/be/src/format_v2/jni/jni_table_reader.h new file mode 100644 index 00000000000000..1317661e5880c2 --- /dev/null +++ b/be/src/format_v2/jni/jni_table_reader.h @@ -0,0 +1,117 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include "common/status.h" +#include "core/data_type/data_type.h" +#include "format/jni/jni_data_bridge.h" +#include "format_v2/table_reader.h" +#include "runtime/runtime_profile.h" +#include "util/jni-util.h" + +namespace doris::format { + +class JniTableReader : public TableReader { +public: + struct JniColumn { + std::string java_name; + // The index of the column in the output block, which is used to place the data from Java side to the correct position in the output block. + size_t output_index = 0; + // The original output type of the column, which is used for type casting after getting the data from Java side. like Bitmap column + // For columns without special types, the transfer_type and output_type are the same. + DataTypePtr output_type; + //Bitmap Type transfer type is String, so the Java scanner will convert the Bitmap column to String before transferring the data to C++, and then C++ side can convert the String back to Bitmap. + DataTypePtr transfer_type; + std::string replace_type = "not_replace"; + }; + + ~JniTableReader() override = default; + + Status init(TableReadOptions&& options) override; + Status prepare_split(const SplitReadOptions& options) override; + Status get_block(Block* block, bool* eos) override; + Status close() override; + +protected: + // Subclasses should implement these methods to specify the Java scanner class + virtual std::string connector_class() const = 0; + virtual Status validate_scan_range(const TFileRangeDesc&) const { return Status::OK(); } + // Subclasses should implement this method to build the scanner params map + virtual Status build_scanner_params(std::map* params) const = 0; + // Subclasses can override this method when Java transfer types differ from output types. + virtual Status build_jni_columns(std::vector* columns) const; + virtual Status finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows); + // used for profile + virtual int64_t self_split_weight() const; + const std::vector& jni_columns() const { return _jni_columns; } + TFileRangeDesc _current_range; + +private: + // init + void _init_profile(); + std::string _connector_name() const; + // open + Status _open_jni_scanner(); + void _reset_split_state(JNIEnv* env); + void _prepare_jni_scanner_schema(); + Status _register_jni_class_functions_once(JNIEnv* env); + Status _create_jni_scanner_object(JNIEnv* env, int batch_size); + // get_next + Status _get_next_jni_block(size_t* rows, bool* eof); + Status _fill_jni_block(JniDataBridge::TableMetaAddress& table_meta, size_t num_rows); + + Status _close_jni_scanner(); + + std::map _scanner_params; + std::vector _jni_columns; + Block _jni_block_template; + + bool _closed = false; + bool _scanner_opened = false; + bool _eof = false; + + RuntimeProfile::Counter* _open_scanner_time = nullptr; + RuntimeProfile::Counter* _java_scan_time = nullptr; + RuntimeProfile::Counter* _java_append_data_time = nullptr; + RuntimeProfile::Counter* _java_create_vector_table_time = nullptr; + RuntimeProfile::Counter* _fill_block_time = nullptr; + RuntimeProfile::ConditionCounter* _max_time_split_weight_counter = nullptr; + + int64_t _jni_scanner_open_watcher = 0; + int64_t _java_scan_watcher = 0; + int64_t _fill_block_watcher = 0; + + Jni::GlobalClass _jni_scanner_cls; + Jni::GlobalObject _jni_scanner_obj; + Jni::MethodId _jni_scanner_constructor; + Jni::MethodId _jni_scanner_open; + Jni::MethodId _jni_scanner_get_append_data_time; + Jni::MethodId _jni_scanner_get_create_vector_table_time; + Jni::MethodId _jni_scanner_get_next_batch; + Jni::MethodId _jni_scanner_close; + Jni::MethodId _jni_scanner_release_column; + Jni::MethodId _jni_scanner_release_table; + Jni::MethodId _jni_scanner_get_statistics; + Jni::MethodId _jni_scanner_set_batch_size; +}; + +} // namespace doris::format diff --git a/be/src/format_v2/jni/max_compute_jni_reader.cpp b/be/src/format_v2/jni/max_compute_jni_reader.cpp new file mode 100644 index 00000000000000..a26e9e229b5d82 --- /dev/null +++ b/be/src/format_v2/jni/max_compute_jni_reader.cpp @@ -0,0 +1,149 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/jni/max_compute_jni_reader.h" + +#include "core/block/block.h" +#include "exprs/vexpr_context.h" + +namespace doris::format::max_compute { + +MaxComputeJniReader::MaxComputeJniReader(const doris::MaxComputeTableDescriptor* table_desc) + : _table_desc(table_desc) {} + +Status MaxComputeJniReader::validate_scan_range(const TFileRangeDesc& range) const { + if (!range.__isset.table_format_params) { + return Status::InternalError("missing table_format_params for max compute jni reader"); + } + if (!range.table_format_params.__isset.max_compute_params) { + return Status::InternalError("missing max_compute_params for max compute jni reader"); + } + const auto& max_compute_params = range.table_format_params.max_compute_params; + if (!max_compute_params.__isset.session_id || max_compute_params.session_id.empty()) { + return Status::InternalError( + "missing session_id for max compute jni reader, possibly caused by FE/BE " + "protocol mismatch"); + } + if (!max_compute_params.__isset.table_batch_read_session || + max_compute_params.table_batch_read_session.empty()) { + return Status::InternalError( + "missing table_batch_read_session for max compute jni reader, possibly caused " + "by FE/BE protocol mismatch"); + } + if (!range.__isset.start_offset) { + return Status::InternalError( + "missing start_offset for max compute jni reader, possibly caused by FE/BE " + "protocol mismatch"); + } + if (!range.__isset.size) { + return Status::InternalError( + "missing size for max compute jni reader, possibly caused by FE/BE protocol " + "mismatch"); + } + if (_scan_params == nullptr) { + return Status::InternalError( + "missing scan params for max compute jni reader, possibly caused by FE/BE " + "protocol mismatch"); + } + return Status::OK(); +} + +std::string MaxComputeJniReader::connector_class() const { + return "org/apache/doris/maxcompute/MaxComputeJniScanner"; +} + +Status MaxComputeJniReader::build_scanner_params(std::map* params) const { + DORIS_CHECK(params != nullptr); + DORIS_CHECK(_table_desc != nullptr); + params->clear(); + + *params = _table_desc->properties(); + (*params)["endpoint"] = _table_desc->endpoint(); + (*params)["quota"] = _table_desc->quota(); + (*params)["project"] = _table_desc->project(); + (*params)["table"] = _table_desc->table(); + + const auto& max_compute_params = _current_range.table_format_params.max_compute_params; + (*params)["session_id"] = max_compute_params.session_id; + (*params)["scan_serializer"] = max_compute_params.table_batch_read_session; + (*params)["start_offset"] = std::to_string(_current_range.start_offset); + (*params)["split_size"] = std::to_string(_current_range.size); + (*params)["connect_timeout"] = std::to_string(max_compute_params.connect_timeout); + (*params)["read_timeout"] = std::to_string(max_compute_params.read_timeout); + (*params)["retry_count"] = std::to_string(max_compute_params.retry_times); + return Status::OK(); +} + +Status MaxComputeJniReader::build_jni_columns( + std::vector* columns) const { + DORIS_CHECK(columns != nullptr); + columns->clear(); + columns->reserve(_projected_columns.size()); + for (size_t i = 0; i < _projected_columns.size(); ++i) { + const auto& table_column = _projected_columns[i]; + if (table_column.is_partition_key && + find_partition_value(table_column, _partition_values) != nullptr) { + continue; + } + columns->push_back({ + .java_name = table_column.name, + .output_index = i, + .output_type = table_column.type, + .transfer_type = table_column.type, + .replace_type = "not_replace", + }); + } + return Status::OK(); +} + +Status MaxComputeJniReader::finalize_jni_block(Block* jni_block, Block* output_block, + size_t* rows) { + DORIS_CHECK(jni_block != nullptr); + DORIS_CHECK(output_block != nullptr); + DORIS_CHECK(rows != nullptr); + const auto original_rows = *rows; + + const auto& columns = jni_columns(); + DORIS_CHECK(columns.size() == jni_block->columns()); + for (size_t i = 0; i < columns.size(); ++i) { + const auto& column = columns[i]; + DORIS_CHECK(column.output_index < output_block->columns()); + output_block->get_by_position(column.output_index).type = column.output_type; + output_block->replace_by_position(column.output_index, + jni_block->get_by_position(i).column); + } + + for (size_t i = 0; i < _projected_columns.size(); ++i) { + const auto& table_column = _projected_columns[i]; + const auto* partition_value = find_partition_value(table_column, _partition_values); + if (!table_column.is_partition_key || partition_value == nullptr) { + continue; + } + output_block->get_by_position(i).type = table_column.type; + output_block->replace_by_position( + i, table_column.type->create_column_const(original_rows, *partition_value)); + } + DORIS_CHECK(output_block->rows() == original_rows); + if (!_conjuncts.empty()) { + RETURN_IF_ERROR( + VExprContext::filter_block(_conjuncts, output_block, output_block->columns())); + } + *rows = output_block->rows(); + return Status::OK(); +} + +} // namespace doris::format::max_compute diff --git a/be/src/format_v2/jni/max_compute_jni_reader.h b/be/src/format_v2/jni/max_compute_jni_reader.h new file mode 100644 index 00000000000000..8addce07988e4c --- /dev/null +++ b/be/src/format_v2/jni/max_compute_jni_reader.h @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include "common/status.h" +#include "format_v2/jni/jni_table_reader.h" +#include "gen_cpp/PlanNodes_types.h" + +namespace doris { +class MaxComputeTableDescriptor; +} // namespace doris + +namespace doris::format::max_compute { + +class MaxComputeJniReader final : public format::JniTableReader { +public: + explicit MaxComputeJniReader(const doris::MaxComputeTableDescriptor* table_desc); + ~MaxComputeJniReader() override = default; + +protected: + std::string connector_class() const override; + Status validate_scan_range(const TFileRangeDesc& range) const override; + Status build_scanner_params(std::map* params) const override; + Status build_jni_columns( + std::vector* columns) const override; + Status finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows) override; + +private: + const doris::MaxComputeTableDescriptor* _table_desc = nullptr; +}; + +} // namespace doris::format::max_compute diff --git a/be/src/format_v2/jni/paimon_jni_reader.cpp b/be/src/format_v2/jni/paimon_jni_reader.cpp new file mode 100644 index 00000000000000..c68cc7b952a5d5 --- /dev/null +++ b/be/src/format_v2/jni/paimon_jni_reader.cpp @@ -0,0 +1,93 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/jni/paimon_jni_reader.h" + +#include + +namespace doris::format::paimon { +namespace { + +constexpr std::string_view PAIMON_OPTION_PREFIX = "paimon."; +constexpr std::string_view HADOOP_OPTION_PREFIX = "hadoop."; + +} // namespace + +Status PaimonJniReader::validate_scan_range(const TFileRangeDesc& range) const { + if (!range.__isset.table_format_params) { + return Status::InternalError("missing table_format_params for paimon jni reader"); + } + if (!range.table_format_params.__isset.paimon_params) { + return Status::InternalError("missing paimon_params for paimon jni reader"); + } + if (!range.table_format_params.paimon_params.__isset.paimon_split || + range.table_format_params.paimon_params.paimon_split.empty()) { + return Status::InternalError( + "missing paimon_split for paimon jni reader, possibly caused by FE/BE protocol " + "mismatch"); + } + if (!range.table_format_params.paimon_params.__isset.reader_type || + range.table_format_params.paimon_params.reader_type != TPaimonReaderType::PAIMON_JNI) { + return Status::InternalError( + "invalid reader_type for paimon jni reader, possibly caused by FE/BE protocol " + "mismatch"); + } + if (_scan_params == nullptr || !_scan_params->__isset.serialized_table || + _scan_params->serialized_table.empty()) { + return Status::InternalError( + "missing serialized_table for paimon jni reader, possibly caused by FE/BE " + "protocol mismatch"); + } + if (!_scan_params->__isset.paimon_predicate || _scan_params->paimon_predicate.empty()) { + return Status::InternalError( + "missing paimon_predicate for paimon jni reader, possibly caused by FE/BE " + "protocol mismatch"); + } + return Status::OK(); +} + +std::string PaimonJniReader::connector_class() const { + return "org/apache/doris/paimon/PaimonJniScanner"; +} + +Status PaimonJniReader::build_scanner_params(std::map* params) const { + DORIS_CHECK(params != nullptr); + DORIS_CHECK(_scan_params != nullptr); + params->clear(); + + const auto& paimon_params = _current_range.table_format_params.paimon_params; + (*params)["paimon_split"] = paimon_params.paimon_split; + (*params)["paimon_predicate"] = _scan_params->paimon_predicate; + (*params)["serialized_table"] = _scan_params->serialized_table; + + if (_scan_params->__isset.paimon_options && !_scan_params->paimon_options.empty()) { + for (const auto& kv : _scan_params->paimon_options) { + (*params)[std::string(PAIMON_OPTION_PREFIX) + kv.first] = kv.second; + } + } + if (_scan_params->__isset.properties && !_scan_params->properties.empty()) { + for (const auto& kv : _scan_params->properties) { + (*params)[std::string(HADOOP_OPTION_PREFIX) + kv.first] = kv.second; + } + } + // TODO: Remove legacy split-level paimon_predicate, paimon_options and hadoop_conf from thrift + // after all readers stop using them. Format V2 Paimon JNI consumes the scan-level fields + // planned by current FE and intentionally does not fall back to deprecated split-level fields. + return Status::OK(); +} + +} // namespace doris::format::paimon diff --git a/be/src/format_v2/jni/paimon_jni_reader.h b/be/src/format_v2/jni/paimon_jni_reader.h new file mode 100644 index 00000000000000..f789edb0b17bd0 --- /dev/null +++ b/be/src/format_v2/jni/paimon_jni_reader.h @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include "common/status.h" +#include "format_v2/jni/jni_table_reader.h" +#include "gen_cpp/PlanNodes_types.h" + +namespace doris::format::paimon { + +class PaimonJniReader final : public format::JniTableReader { +public: + ~PaimonJniReader() override = default; + +protected: + std::string connector_class() const override; + Status validate_scan_range(const TFileRangeDesc& range) const override; + Status build_scanner_params(std::map* params) const override; +}; + +} // namespace doris::format::paimon diff --git a/be/src/format_v2/jni/trino_connector_jni_reader.cpp b/be/src/format_v2/jni/trino_connector_jni_reader.cpp new file mode 100644 index 00000000000000..11c9945c5dea16 --- /dev/null +++ b/be/src/format_v2/jni/trino_connector_jni_reader.cpp @@ -0,0 +1,141 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/jni/trino_connector_jni_reader.h" + +#include + +#include "common/config.h" +#include "util/jni-util.h" + +namespace doris::format::trino_connector { +namespace { + +constexpr std::string_view TRINO_CONNECTOR_OPTION_PREFIX = "trino."; +constexpr std::string_view TRINO_CONNECTOR_NAME = "connector.name"; + +} // namespace + +Status TrinoConnectorJniReader::validate_scan_range(const TFileRangeDesc& range) const { + if (!range.__isset.table_format_params) { + return Status::InternalError("missing table_format_params for trino connector jni reader"); + } + if (!range.table_format_params.__isset.trino_connector_params) { + return Status::InternalError( + "missing trino_connector_params for trino connector jni reader"); + } + + const auto& trino_params = range.table_format_params.trino_connector_params; + if (!trino_params.__isset.catalog_name || trino_params.catalog_name.empty()) { + return Status::InternalError( + "missing catalog_name for trino connector jni reader, possibly caused by FE/BE " + "protocol mismatch"); + } + if (!trino_params.__isset.trino_connector_options || + !trino_params.trino_connector_options.contains(std::string(TRINO_CONNECTOR_NAME))) { + return Status::InternalError( + "missing trino connector.name option for trino connector jni reader, possibly " + "caused by FE/BE protocol mismatch"); + } + if (!trino_params.__isset.trino_connector_split || trino_params.trino_connector_split.empty()) { + return Status::InternalError( + "missing trino_connector_split for trino connector jni reader, possibly caused " + "by FE/BE protocol mismatch"); + } + if (!trino_params.__isset.trino_connector_table_handle || + trino_params.trino_connector_table_handle.empty()) { + return Status::InternalError( + "missing trino_connector_table_handle for trino connector jni reader, possibly " + "caused by FE/BE protocol mismatch"); + } + if (!trino_params.__isset.trino_connector_column_handles || + trino_params.trino_connector_column_handles.empty()) { + return Status::InternalError( + "missing trino_connector_column_handles for trino connector jni reader, possibly " + "caused by FE/BE protocol mismatch"); + } + if (!trino_params.__isset.trino_connector_column_metadata || + trino_params.trino_connector_column_metadata.empty()) { + return Status::InternalError( + "missing trino_connector_column_metadata for trino connector jni reader, possibly " + "caused by FE/BE protocol mismatch"); + } + if (!trino_params.__isset.trino_connector_trascation_handle || + trino_params.trino_connector_trascation_handle.empty()) { + return Status::InternalError( + "missing trino_connector_trascation_handle for trino connector jni reader, " + "possibly caused by FE/BE protocol mismatch"); + } + return Status::OK(); +} + +Status TrinoConnectorJniReader::prepare_split(const format::SplitReadOptions& options) { + RETURN_IF_ERROR(validate_scan_range(options.current_range)); + RETURN_IF_ERROR(_set_spi_plugins_dir()); + return format::JniTableReader::prepare_split(options); +} + +std::string TrinoConnectorJniReader::connector_class() const { + return "org/apache/doris/trinoconnector/TrinoConnectorJniScanner"; +} + +Status TrinoConnectorJniReader::build_scanner_params( + std::map* params) const { + DORIS_CHECK(params != nullptr); + params->clear(); + + const auto& trino_params = _current_range.table_format_params.trino_connector_params; + (*params)["catalog_name"] = trino_params.catalog_name; + (*params)["db_name"] = trino_params.db_name; + (*params)["table_name"] = trino_params.table_name; + (*params)["trino_connector_split"] = trino_params.trino_connector_split; + (*params)["trino_connector_table_handle"] = trino_params.trino_connector_table_handle; + (*params)["trino_connector_column_handles"] = trino_params.trino_connector_column_handles; + (*params)["trino_connector_column_metadata"] = trino_params.trino_connector_column_metadata; + (*params)["trino_connector_predicate"] = trino_params.trino_connector_predicate; + (*params)["trino_connector_trascation_handle"] = trino_params.trino_connector_trascation_handle; + + for (const auto& kv : trino_params.trino_connector_options) { + (*params)[std::string(TRINO_CONNECTOR_OPTION_PREFIX) + kv.first] = kv.second; + } + return Status::OK(); +} + +Status TrinoConnectorJniReader::_set_spi_plugins_dir() const { + JNIEnv* env = nullptr; + RETURN_IF_ERROR(Jni::Env::Get(&env)); + + Jni::LocalClass plugin_loader_cls; + const std::string plugin_loader_class = + "org/apache/doris/trinoconnector/TrinoConnectorPluginLoader"; + RETURN_IF_ERROR( + Jni::Util::get_jni_scanner_class(env, plugin_loader_class.c_str(), &plugin_loader_cls)); + + Jni::MethodId set_plugins_dir_method; + RETURN_IF_ERROR(plugin_loader_cls.get_static_method( + env, "setPluginsDir", "(Ljava/lang/String;)V", &set_plugins_dir_method)); + + Jni::LocalString trino_connector_plugin_path; + RETURN_IF_ERROR(Jni::LocalString::new_string( + env, doris::config::trino_connector_plugin_dir.c_str(), &trino_connector_plugin_path)); + + return plugin_loader_cls.call_static_void_method(env, set_plugins_dir_method) + .with_arg(trino_connector_plugin_path) + .call(); +} + +} // namespace doris::format::trino_connector diff --git a/be/src/format_v2/jni/trino_connector_jni_reader.h b/be/src/format_v2/jni/trino_connector_jni_reader.h new file mode 100644 index 00000000000000..a20c3a5f62ef96 --- /dev/null +++ b/be/src/format_v2/jni/trino_connector_jni_reader.h @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "common/status.h" +#include "format_v2/jni/jni_table_reader.h" +#include "gen_cpp/PlanNodes_types.h" + +namespace doris::format::trino_connector { + +class TrinoConnectorJniReader final : public format::JniTableReader { +public: + ~TrinoConnectorJniReader() override = default; + + Status prepare_split(const format::SplitReadOptions& options) override; + +protected: + std::string connector_class() const override; + Status validate_scan_range(const TFileRangeDesc& range) const override; + Status build_scanner_params(std::map* params) const override; + +private: + Status _set_spi_plugins_dir() const; +}; + +} // namespace doris::format::trino_connector diff --git a/be/src/format_v2/json/json_reader.cpp b/be/src/format_v2/json/json_reader.cpp new file mode 100644 index 00000000000000..f0219bb7d85345 --- /dev/null +++ b/be/src/format_v2/json/json_reader.cpp @@ -0,0 +1,1123 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/json/json_reader.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include "common/cast_set.h" +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_array.h" +#include "core/column/column_map.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_struct.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type/data_type_struct.h" +#include "format/file_reader/new_plain_text_line_reader.h" +#include "format_v2/column_mapper.h" +#include "format_v2/materialized_reader_util.h" +#include "io/file_factory.h" +#include "io/fs/file_reader.h" +#include "io/fs/stream_load_pipe.h" +#include "io/fs/tracing_file_reader.h" +#include "runtime/descriptors.h" +#include "runtime/runtime_state.h" +#include "util/decompressor.h" +#include "util/slice.h" + +namespace doris::format::json { +namespace { + +DataTypePtr json_file_type_from_slot_type(const DataTypePtr& type) { + if (type == nullptr) { + return nullptr; + } + + // Text-like file readers expose CHAR/VARCHAR as STRING and let the table column mapper cast to + // the destination slot type. JSON follows the same file-schema convention so that v2 mapping + // behaves consistently across text formats. + const bool is_nullable = type->is_nullable(); + const auto nested_type = remove_nullable(type); + DataTypePtr file_type; + switch (nested_type->get_primitive_type()) { + case TYPE_CHAR: + case TYPE_VARCHAR: + file_type = std::make_shared(); + break; + case TYPE_ARRAY: { + const auto* array_type = assert_cast(nested_type.get()); + file_type = std::make_shared( + json_file_type_from_slot_type(array_type->get_nested_type())); + break; + } + case TYPE_MAP: { + const auto* map_type = assert_cast(nested_type.get()); + file_type = std::make_shared( + json_file_type_from_slot_type(map_type->get_key_type()), + json_file_type_from_slot_type(map_type->get_value_type())); + break; + } + case TYPE_STRUCT: { + const auto* struct_type = assert_cast(nested_type.get()); + DataTypes file_children; + file_children.reserve(struct_type->get_elements().size()); + for (const auto& child_type : struct_type->get_elements()) { + file_children.push_back(json_file_type_from_slot_type(child_type)); + } + file_type = + std::make_shared(file_children, struct_type->get_element_names()); + break; + } + default: + file_type = nested_type; + break; + } + + return is_nullable ? make_nullable(file_type) : file_type; +} + +ColumnDefinition synthetic_file_child(const std::string& name, DataTypePtr type, int32_t local_id); + +std::vector synthesize_file_children_from_type(const DataTypePtr& type) { + std::vector children; + if (type == nullptr) { + return children; + } + const auto nested_type = remove_nullable(type); + switch (nested_type->get_primitive_type()) { + case TYPE_ARRAY: { + const auto* array_type = assert_cast(nested_type.get()); + children.push_back(synthetic_file_child("element", array_type->get_nested_type(), 0)); + break; + } + case TYPE_MAP: { + const auto* map_type = assert_cast(nested_type.get()); + children.push_back(synthetic_file_child("key", map_type->get_key_type(), 0)); + children.push_back(synthetic_file_child("value", map_type->get_value_type(), 1)); + break; + } + case TYPE_STRUCT: { + const auto* struct_type = assert_cast(nested_type.get()); + children.reserve(struct_type->get_elements().size()); + for (size_t idx = 0; idx < struct_type->get_elements().size(); ++idx) { + children.push_back(synthetic_file_child(struct_type->get_element_name(idx), + struct_type->get_element(idx), + cast_set(idx))); + } + break; + } + default: + break; + } + return children; +} + +ColumnDefinition synthetic_file_child(const std::string& name, DataTypePtr type, int32_t local_id) { + ColumnDefinition child; + child.identifier = Field::create_field(name); + child.local_id = local_id; + child.name = name; + child.type = std::move(type); + child.children = synthesize_file_children_from_type(child.type); + return child; +} + +std::string lower_key(std::string_view key) { + std::string lowered(key.data(), key.size()); + std::transform(lowered.begin(), lowered.end(), lowered.begin(), ::tolower); + return lowered; +} + +} // namespace + +JsonReader::JsonReader(std::shared_ptr& system_properties, + std::unique_ptr& file_description, + std::shared_ptr io_ctx, RuntimeProfile* profile, + const TFileScanRangeParams* scan_params, const TFileRangeDesc& range, + const std::vector& file_slot_descs, + TFileCompressType::type range_compress_type, + std::optional stream_load_id) + : FileReader(system_properties, file_description, std::move(io_ctx), profile), + _scan_params(scan_params), + _range(range), + _source_file_slot_descs(file_slot_descs), + _range_compress_type(range_compress_type), + _stream_load_id(std::move(stream_load_id)) {} + +JsonReader::~JsonReader() { + static_cast(close()); +} + +Status JsonReader::init(RuntimeState* state) { + _runtime_state = state; + if (_scan_params == nullptr) { + return Status::InvalidArgument("JSON v2 reader requires scan params"); + } + if (_file_description == nullptr) { + return Status::InvalidArgument("JSON v2 reader requires file description"); + } + if (_runtime_state == nullptr) { + return Status::InvalidArgument("JSON v2 reader requires runtime state"); + } + if (!_scan_params->__isset.file_attributes) { + return Status::InvalidArgument("JSON v2 reader requires file attributes"); + } + + const auto& attributes = _scan_params->file_attributes; + if (attributes.__isset.text_params && attributes.text_params.__isset.line_delimiter) { + _line_delimiter = attributes.text_params.line_delimiter; + } else { + _line_delimiter = "\n"; + } + _line_delimiter_length = _line_delimiter.size(); + _jsonpaths = attributes.__isset.jsonpaths ? attributes.jsonpaths : ""; + _json_root = attributes.__isset.json_root ? attributes.json_root : ""; + _read_json_by_line = attributes.__isset.read_json_by_line && attributes.read_json_by_line; + _strip_outer_array = attributes.__isset.strip_outer_array && attributes.strip_outer_array; + _num_as_string = attributes.__isset.num_as_string && attributes.num_as_string; + _fuzzy_parse = attributes.__isset.fuzzy_parse && attributes.fuzzy_parse; + _openx_json_ignore_malformed = attributes.__isset.openx_json_ignore_malformed && + attributes.openx_json_ignore_malformed; + _is_hive_table = _range.table_format_params.table_format_type == "hive"; + _file_compress_type = _range_compress_type != TFileCompressType::UNKNOWN + ? _range_compress_type + : _scan_params->compress_type; + + _source_serdes = create_data_type_serdes(_source_file_slot_descs); + _file_schema.clear(); + _file_schema.reserve(_source_file_slot_descs.size()); + // JSON has no physical footer schema. The FE file slots are therefore the authoritative schema + // for both field names and source local ids. + for (size_t idx = 0; idx < _source_file_slot_descs.size(); ++idx) { + const auto* slot = _source_file_slot_descs[idx]; + DORIS_CHECK(slot != nullptr); + ColumnDefinition field; + field.identifier = Field::create_field(slot->col_name()); + field.local_id = cast_set(idx); + field.name = slot->col_name(); + field.type = json_file_type_from_slot_type(slot->get_data_type_ptr()); + field.children = synthesize_file_children_from_type(field.type); + _file_schema.push_back(std::move(field)); + } + _eof = false; + return Status::OK(); +} + +Status JsonReader::get_schema(std::vector* file_schema) const { + if (file_schema == nullptr) { + return Status::InvalidArgument("JSON v2 file_schema is null"); + } + *file_schema = _file_schema; + return Status::OK(); +} + +std::unique_ptr JsonReader::create_column_mapper( + TableColumnMapperOptions options) const { + return std::make_unique(std::move(options)); +} + +Status JsonReader::open(std::shared_ptr request) { + RETURN_IF_ERROR(FileReader::open(std::move(request))); + DORIS_CHECK(_request != nullptr); + RETURN_IF_ERROR(_build_requested_columns(*_request, &_requested_columns)); + _slot_name_to_index.clear(); + _slot_name_to_index.reserve(_requested_columns.size()); + for (size_t idx = 0; idx < _requested_columns.size(); ++idx) { + auto name = _requested_columns[idx].slot_desc->col_name(); + _slot_name_to_index.emplace(_is_hive_table ? lower_key(name) : name, idx); + } + _previous_positions.clear(); + _reader_range = _json_range(); + RETURN_IF_ERROR(_open_file_reader()); + RETURN_IF_ERROR(_create_decompressor()); + if (_read_json_by_line) { + RETURN_IF_ERROR(_create_line_reader()); + } + RETURN_IF_ERROR(_parse_jsonpath_and_json_root()); + _json_parser = std::make_unique(); + _padding_buffer.resize(_padded_size); + _reader_eof = false; + _single_document_read = false; + _eof = false; + return Status::OK(); +} + +Status JsonReader::get_block(Block* file_block, size_t* rows, bool* eof) { + DORIS_CHECK(file_block != nullptr); + DORIS_CHECK(rows != nullptr); + DORIS_CHECK(eof != nullptr); + if (_json_parser == nullptr || _physical_file_reader == nullptr) { + return Status::InternalError("JSON v2 reader is not open"); + } + + const auto batch_size = _runtime_state->batch_size(); + const auto max_block_bytes = _runtime_state->preferred_block_size_bytes(); + *rows = 0; + *eof = false; + + while (file_block->rows() < batch_size && !_reader_eof && + file_block->bytes() < max_block_bytes) { + if (_read_json_by_line && _skip_first_line) { + size_t skipped_size = 0; + const uint8_t* skipped_line = nullptr; + RETURN_IF_ERROR(_line_reader->read_line(&skipped_line, &skipped_size, &_reader_eof, + _io_ctx.get())); + _skip_first_line = false; + continue; + } + + const size_t original_rows = file_block->rows(); + size_t size = 0; + bool is_empty_row = false; + Status st = Status::OK(); + try { + st = _parse_next_json(&size, &_reader_eof); + if (st.ok() && !_reader_eof) { + if (size == 0) { + is_empty_row = true; + } else { + st = _extract_json_value(size, &_reader_eof, &is_empty_row); + } + } + if (st.ok() && !_reader_eof && !is_empty_row) { + st = _append_rows_from_current_value(file_block, &is_empty_row, &_reader_eof); + } + } catch (simdjson::simdjson_error& e) { + st = Status::DataQualityError("Parse json data failed. code: {}, error info: {}", + e.error(), e.what()); + } + if (!st.ok()) { + RETURN_IF_ERROR(_handle_json_error(st, file_block, original_rows, &is_empty_row)); + } + // An ignored or empty JSON object can produce no row. Avoid spinning forever on a document + // that was consumed but produced no materialized value. + if (!is_empty_row && file_block->rows() == original_rows) { + break; + } + } + + *rows = file_block->rows(); + RETURN_IF_ERROR(_apply_filters(file_block, rows)); + _reader_statistics.read_rows += *rows; + *eof = _reader_eof && *rows == 0; + _eof = *eof; + return Status::OK(); +} + +Status JsonReader::close() { + if (_line_reader != nullptr) { + _line_reader->close(); + _line_reader.reset(); + } + _json_parser.reset(); + _decompressor.reset(); + _physical_file_reader.reset(); + _tracing_file_reader.reset(); + _file_reader.reset(); + _requested_columns.clear(); + _slot_name_to_index.clear(); + _previous_positions.clear(); + _cached_string_values.clear(); + return Status::OK(); +} + +Status JsonReader::_build_requested_columns(const FileScanRequest& request, + std::vector* columns) const { + DORIS_CHECK(columns != nullptr); + columns->clear(); + // FileScanRequest stores a map from file-local id to output block position. Materialization is + // position-driven, so normalize it into a dense vector ordered by block position while keeping + // the original source index for jsonpaths. + std::vector by_position(request.local_positions.size()); + for (const auto& [file_column_id, block_position] : request.local_positions) { + if (file_column_id.value() < 0 || + static_cast(file_column_id.value()) >= _source_file_slot_descs.size()) { + return Status::InvalidArgument("JSON v2 request references unknown local column id {}", + file_column_id.value()); + } + if (block_position.value() >= by_position.size()) { + return Status::InvalidArgument("JSON v2 request has invalid block position {}", + block_position.value()); + } + const auto source_index = cast_set(file_column_id.value()); + RequestedColumn requested_column; + requested_column.file_column_id = file_column_id; + requested_column.block_position = block_position; + requested_column.source_index = source_index; + requested_column.slot_desc = _source_file_slot_descs[source_index]; + requested_column.serde = _source_serdes[source_index]; + by_position[block_position.value()] = std::move(requested_column); + } + for (size_t pos = 0; pos < by_position.size(); ++pos) { + if (!by_position[pos].file_column_id.is_valid()) { + return Status::InvalidArgument("JSON v2 request misses block position {}", pos); + } + } + *columns = std::move(by_position); + return Status::OK(); +} + +TFileRangeDesc JsonReader::_json_range() const { + auto range = _range; + range.__set_path(_file_description->path); + range.__set_start_offset(_file_description->range_start_offset); + range.__set_size(_file_description->range_size); + if (_file_description->file_size >= 0) { + range.__set_file_size(_file_description->file_size); + } + if (!_file_description->fs_name.empty()) { + range.__set_fs_name(_file_description->fs_name); + } + range.__set_file_cache_admission(_file_description->file_cache_admission); + if (_range_compress_type != TFileCompressType::UNKNOWN) { + range.__set_compress_type(_range_compress_type); + } + if (_stream_load_id.has_value()) { + range.__set_load_id(*_stream_load_id); + } + return range; +} + +Status JsonReader::_open_file_reader() { + _current_offset = _reader_range.start_offset; + if (_current_offset != 0) { + --_current_offset; + } + if (_scan_params->file_type == TFileType::FILE_STREAM) { + if (!_stream_load_id.has_value()) { + return Status::InvalidArgument("JSON v2 stream reader requires load id"); + } + RETURN_IF_ERROR(FileFactory::create_pipe_reader(*_stream_load_id, &_physical_file_reader, + _runtime_state, /*need_schema=*/false)); + } else { + _file_description->mtime = + _reader_range.__isset.modification_time ? _reader_range.modification_time : 0; + auto reader_options = FileFactory::get_reader_options(_runtime_state->query_options(), + *_file_description); + auto file_reader = DORIS_TRY(FileFactory::create_file_reader( + *_system_properties, *_file_description, reader_options, _profile)); + _physical_file_reader = + _io_ctx && _io_ctx->file_reader_stats + ? std::make_shared(std::move(file_reader), + _io_ctx->file_reader_stats) + : file_reader; + } + _file_reader = _physical_file_reader; + _tracing_file_reader = _physical_file_reader; + return Status::OK(); +} + +Status JsonReader::_create_decompressor() { + return Decompressor::create_decompressor(_file_compress_type, &_decompressor); +} + +Status JsonReader::_create_line_reader() { + int64_t size = _reader_range.size; + if (_reader_range.start_offset != 0) { + // Start one byte earlier and discard the first partial line, matching split semantics used + // by text readers. + ++size; + _skip_first_line = true; + } else { + _skip_first_line = false; + } + _line_reader = NewPlainTextLineReader::create_unique( + _profile, _physical_file_reader, _decompressor.get(), + std::make_shared(_line_delimiter, _line_delimiter_length, + false), + size, _current_offset); + return Status::OK(); +} + +Status JsonReader::_parse_jsonpath_and_json_root() { + _parsed_jsonpaths.clear(); + _parsed_json_root.clear(); + if (!_jsonpaths.empty()) { + rapidjson::Document jsonpaths_doc; + if (jsonpaths_doc.Parse(_jsonpaths.c_str(), _jsonpaths.length()).HasParseError() || + !jsonpaths_doc.IsArray()) { + return Status::InvalidJsonPath("Invalid json path: {}", _jsonpaths); + } + for (int i = 0; i < jsonpaths_doc.Size(); ++i) { + const rapidjson::Value& path = jsonpaths_doc[i]; + if (!path.IsString()) { + return Status::InvalidJsonPath("Invalid json path: {}", _jsonpaths); + } + std::string json_path = path.GetString(); + if (json_path.size() == 1 && json_path[0] == '$') { + json_path.insert(1, "."); + } + std::vector parsed_paths; + JsonFunctions::parse_json_paths(json_path, &parsed_paths); + _parsed_jsonpaths.push_back(std::move(parsed_paths)); + } + } + if (!_json_root.empty()) { + std::string json_root = _json_root; + if (json_root.size() == 1 && json_root[0] == '$') { + json_root.insert(1, "."); + } + JsonFunctions::parse_json_paths(json_root, &_parsed_json_root); + } + return Status::OK(); +} + +Status JsonReader::_read_one_document(size_t* size, bool* eof) { + DORIS_CHECK(size != nullptr); + DORIS_CHECK(eof != nullptr); + *size = 0; + *eof = false; + if (_line_reader != nullptr) { + const uint8_t* line = nullptr; + RETURN_IF_ERROR(_line_reader->read_line(&line, size, eof, _io_ctx.get())); + if (*eof) { + return Status::OK(); + } + _document_buffer.assign(reinterpret_cast(line), *size); + return Status::OK(); + } + // Non-line mode treats the split as one JSON document. This supports a single object or an + // array with strip_outer_array=true. + if (_single_document_read) { + *eof = true; + return Status::OK(); + } + _single_document_read = true; + if (_scan_params->file_type == TFileType::FILE_STREAM) { + return _read_one_document_from_pipe(size); + } + + auto read_size = _reader_range.size; + if (read_size <= 0 && _reader_range.__isset.file_size) { + read_size = _reader_range.file_size - _current_offset; + } + if (read_size <= 0) { + *eof = true; + return Status::OK(); + } + _document_buffer.resize(cast_set(read_size)); + Slice result(_document_buffer.data(), _document_buffer.size()); + RETURN_IF_ERROR(_physical_file_reader->read_at(_current_offset, result, size, _io_ctx.get())); + _document_buffer.resize(*size); + if (*size == 0) { + *eof = true; + } + return Status::OK(); +} + +Status JsonReader::_read_one_document_from_pipe(size_t* read_size) { + auto* stream_load_pipe = dynamic_cast(_physical_file_reader.get()); + if (stream_load_pipe == nullptr) { + return Status::InternalError("JSON v2 stream reader requires StreamLoadPipe"); + } + DorisUniqueBufferPtr file_buf; + RETURN_IF_ERROR(stream_load_pipe->read_one_message(&file_buf, read_size)); + _document_buffer.assign(reinterpret_cast(file_buf.get()), *read_size); + if (!stream_load_pipe->is_chunked_transfer()) { + return Status::OK(); + } + + while (true) { + DorisUniqueBufferPtr next_buf; + size_t next_size = 0; + RETURN_IF_ERROR(stream_load_pipe->read_one_message(&next_buf, &next_size)); + if (next_size == 0) { + break; + } + _document_buffer.append(reinterpret_cast(next_buf.get()), next_size); + *read_size += next_size; + } + return Status::OK(); +} + +Status JsonReader::_parse_next_json(size_t* size, bool* eof) { + RETURN_IF_ERROR(_read_one_document(size, eof)); + if (*eof || *size == 0) { + return Status::OK(); + } + if (*size >= 3 && static_cast(_document_buffer[0]) == 0xEF && + static_cast(_document_buffer[1]) == 0xBB && + static_cast(_document_buffer[2]) == 0xBF) { + _document_buffer.erase(0, 3); + *size -= 3; + } + if (*size + simdjson::SIMDJSON_PADDING > _padded_size) { + _padded_size = *size + simdjson::SIMDJSON_PADDING; + _padding_buffer.resize(_padded_size); + } + // Ondemand values reference the input buffer. Keep the padded bytes in a member buffer until the + // current document is fully materialized. + std::memcpy(_padding_buffer.data(), _document_buffer.data(), *size); + _original_doc_size = *size; + const auto error = + _json_parser->iterate(std::string_view(_padding_buffer.data(), *size), _padded_size) + .get(_original_json_doc); + if (error != simdjson::error_code::SUCCESS) { + return Status::DataQualityError( + "Parse json data for JsonDoc failed. code: {}, error info: {}", error, + simdjson::error_message(error)); + } + return Status::OK(); +} + +Status JsonReader::_extract_json_value(size_t size, bool* eof, bool* is_empty_row) { + DORIS_CHECK(eof != nullptr); + DORIS_CHECK(is_empty_row != nullptr); + *is_empty_row = false; + if (size == 0 || *eof) { + *is_empty_row = true; + return Status::OK(); + } + auto type_res = _original_json_doc.type(); + if (type_res.error() != simdjson::error_code::SUCCESS) { + return Status::DataQualityError( + "Parse json data for JsonDoc failed. code: {}, error info: {}", type_res.error(), + simdjson::error_message(type_res.error())); + } + const auto type = type_res.value(); + if (type != simdjson::ondemand::json_type::object && + type != simdjson::ondemand::json_type::array) { + return Status::DataQualityError("Not an json object or json array"); + } + _parsed_from_json_root = false; + if (!_parsed_json_root.empty() && type == simdjson::ondemand::json_type::object) { + // In object mode json_root can be applied once here. In outer-array mode each array element + // needs its own root extraction, which is handled while iterating the array. + simdjson::ondemand::object object = _original_json_doc; + Status st = JsonFunctions::extract_from_object(object, _parsed_json_root, &_json_value); + if (!st.ok()) { + return Status::DataQualityError("{}", st.to_string()); + } + _parsed_from_json_root = true; + } else { + _json_value = _original_json_doc; + } + + const auto value_type = _json_value.type().value(); + if (value_type == simdjson::ondemand::json_type::array && !_strip_outer_array) { + return Status::DataQualityError( + "JSON data is array-object, `strip_outer_array` must be TRUE."); + } + if (value_type != simdjson::ondemand::json_type::array && _strip_outer_array) { + return Status::DataQualityError( + "JSON data is not an array-object, `strip_outer_array` must be FALSE."); + } + if (!_parsed_jsonpaths.empty() && _strip_outer_array && + _json_value.count_elements().value() == 0) { + *is_empty_row = true; + } + return Status::OK(); +} + +Status JsonReader::_append_rows_from_current_value(Block* block, bool* is_empty_row, bool* eof) { + if (_parsed_jsonpaths.empty()) { + return _append_simple_json_rows(block, is_empty_row, eof); + } + if (_strip_outer_array) { + return _append_flat_array_jsonpath_rows(block, is_empty_row, eof); + } + return _append_nested_jsonpath_row(block, is_empty_row, eof); +} + +Status JsonReader::_append_simple_json_rows(Block* block, bool* is_empty_row, bool* eof) { + DORIS_CHECK(block != nullptr); + DORIS_CHECK(is_empty_row != nullptr); + DORIS_CHECK(eof != nullptr); + bool valid = false; + if (_json_value.type().value() == simdjson::ondemand::json_type::array) { + _array = _json_value.get_array(); + if (_array.count_elements() == 0) { + *is_empty_row = true; + return Status::OK(); + } + _array_iter = _array.begin(); + while (_array_iter != _array.end()) { + simdjson::ondemand::object object_value = (*_array_iter).get_object(); + RETURN_IF_ERROR(_set_column_values_from_object(&object_value, block, &valid)); + ++_array_iter; + if (!valid) { + *is_empty_row = true; + return Status::OK(); + } + } + } else { + simdjson::ondemand::object object_value = _json_value.get_object(); + RETURN_IF_ERROR(_set_column_values_from_object(&object_value, block, &valid)); + if (!valid) { + *is_empty_row = true; + return Status::OK(); + } + } + *is_empty_row = false; + return Status::OK(); +} + +Status JsonReader::_append_flat_array_jsonpath_rows(Block* block, bool* is_empty_row, bool* eof) { + DORIS_CHECK(block != nullptr); + DORIS_CHECK(is_empty_row != nullptr); + DORIS_CHECK(eof != nullptr); + const size_t original_rows = block->rows(); + bool valid = true; + _array = _json_value.get_array(); + _array_iter = _array.begin(); + while (_array_iter != _array.end()) { + simdjson::ondemand::object object_value = (*_array_iter).get_object(); + if (!_parsed_from_json_root && !_parsed_json_root.empty()) { + // For strip_outer_array, json_root is evaluated against each element. Elements without + // the requested root do not produce rows, matching the load reader behavior. + simdjson::ondemand::value rooted_value; + Status st = JsonFunctions::extract_from_object(object_value, _parsed_json_root, + &rooted_value); + if (!st.ok()) { + if (st.is()) { + ++_array_iter; + continue; + } + return st; + } + if (rooted_value.type().value() != simdjson::ondemand::json_type::object) { + ++_array_iter; + continue; + } + object_value = rooted_value.get_object(); + } + RETURN_IF_ERROR(_write_columns_by_jsonpath(&object_value, block, &valid)); + ++_array_iter; + } + *is_empty_row = block->rows() == original_rows; + return Status::OK(); +} + +Status JsonReader::_append_nested_jsonpath_row(Block* block, bool* is_empty_row, bool* eof) { + DORIS_CHECK(block != nullptr); + DORIS_CHECK(is_empty_row != nullptr); + DORIS_CHECK(eof != nullptr); + if (_json_value.type().value() != simdjson::ondemand::json_type::object) { + return Status::DataQualityError("Not object item"); + } + bool valid = true; + simdjson::ondemand::object object_value = _json_value.get_object(); + RETURN_IF_ERROR(_write_columns_by_jsonpath(&object_value, block, &valid)); + *is_empty_row = !valid; + return Status::OK(); +} + +Status JsonReader::_set_column_values_from_object(simdjson::ondemand::object* object_value, + Block* block, bool* valid) { + DORIS_CHECK(object_value != nullptr); + DORIS_CHECK(block != nullptr); + DORIS_CHECK(valid != nullptr); + std::vector seen_columns(block->columns(), false); + const size_t cur_row_count = block->rows(); + bool has_valid_value = false; + size_t key_index = 0; + + for (auto field : *object_value) { + std::string_view key = field.unescaped_key().value(); + const size_t column_index = _column_index(key, key_index++); + if (column_index == static_cast(-1)) { + continue; + } + if (seen_columns[column_index]) { + if (_is_hive_table) { + // Hive JSON keeps the last duplicate key ignoring case. The earlier value has + // already been appended, so remove it before writing the replacement. + _pop_back_last_inserted_value(block, column_index); + } else { + continue; + } + } + simdjson::ondemand::value value = field.value().value(); + const auto& requested = _requested_columns[column_index]; + auto* column_ptr = block->get_by_position(column_index).column->assert_mutable().get(); + RETURN_IF_ERROR(_write_data_to_column( + value, requested.slot_desc->get_data_type_ptr(), column_ptr, + requested.slot_desc->col_name(), requested.serde, valid)); + if (!*valid) { + return Status::OK(); + } + seen_columns[column_index] = true; + has_valid_value = true; + } + + for (size_t i = 0; i < _requested_columns.size(); ++i) { + if (seen_columns[i]) { + continue; + } + auto* column_ptr = block->get_by_position(i).column->assert_mutable().get(); + RETURN_IF_ERROR(_fill_missing_column(_requested_columns[i], column_ptr, valid)); + if (!*valid) { + _truncate_block_to_rows(block, cur_row_count); + return Status::OK(); + } + } + *valid = true; + if (!has_valid_value) { + return Status::OK(); + } + return Status::OK(); +} + +Status JsonReader::_write_columns_by_jsonpath(simdjson::ondemand::object* object_value, + Block* block, bool* valid) { + DORIS_CHECK(object_value != nullptr); + DORIS_CHECK(block != nullptr); + DORIS_CHECK(valid != nullptr); + bool has_valid_value = false; + const size_t cur_row_count = block->rows(); + _cached_string_values.clear(); + + for (size_t i = 0; i < _requested_columns.size(); ++i) { + const auto& requested = _requested_columns[i]; + auto* column_ptr = block->get_by_position(i).column->assert_mutable().get(); + simdjson::ondemand::value json_value; + Status st = Status::OK(); + if (requested.source_index < _parsed_jsonpaths.size()) { + st = JsonFunctions::extract_from_object( + *object_value, _parsed_jsonpaths[requested.source_index], &json_value); + if (!st.ok() && !st.is()) { + return st; + } + } + if (_is_root_path_for_column(requested)) { + // A root jsonpath means "materialize the whole current JSON document" instead of a + // field under it. Use the original bytes so callers receive the same document text. + if (is_column_nullable(*column_ptr)) { + auto* nullable_column = assert_cast(column_ptr); + nullable_column->get_null_map_data().push_back(0); + auto* column_string = + assert_cast(nullable_column->get_nested_column_ptr().get()); + column_string->insert_data(_padding_buffer.data(), _original_doc_size); + } else { + auto* column_string = assert_cast(column_ptr); + column_string->insert_data(_padding_buffer.data(), _original_doc_size); + } + has_valid_value = true; + } else if (requested.source_index >= _parsed_jsonpaths.size() || + st.is()) { + RETURN_IF_ERROR(_fill_missing_column(requested, column_ptr, valid)); + if (!*valid) { + _truncate_block_to_rows(block, cur_row_count); + return Status::OK(); + } + } else { + RETURN_IF_ERROR(_write_data_to_column( + json_value, requested.slot_desc->get_data_type_ptr(), column_ptr, + requested.slot_desc->col_name(), requested.serde, valid)); + if (!*valid) { + _truncate_block_to_rows(block, cur_row_count); + return Status::OK(); + } + has_valid_value = true; + } + } + + if (!has_valid_value) { + // jsonpaths can legally match nothing. Roll the row back so an all-missing path set does + // not create a synthetic row of nulls. + _truncate_block_to_rows(block, cur_row_count); + *valid = false; + return Status::OK(); + } + *valid = true; + return Status::OK(); +} + +template +Status JsonReader::_write_data_to_column(simdjson::ondemand::value& value, + const DataTypePtr& type_desc, IColumn* column_ptr, + const std::string& column_name, + const DataTypeSerDeSPtr& serde, bool* valid) { + ColumnNullable* nullable_column = nullptr; + IColumn* data_column_ptr = column_ptr; + DataTypeSerDeSPtr data_serde = serde; + const auto value_type = value.type().value(); + + if (is_column_nullable(*column_ptr)) { + nullable_column = assert_cast(column_ptr); + data_column_ptr = nullable_column->get_nested_column().get_ptr().get(); + if (type_desc->is_nullable()) { + data_serde = serde->get_nested_serdes()[0]; + } + if (value_type == simdjson::ondemand::json_type::null) { + nullable_column->insert_default(); + *valid = true; + return Status::OK(); + } + } else if (value_type == simdjson::ondemand::json_type::null) { + return Status::DataQualityError("Json value is null, but the column `{}` is not nullable.", + column_name); + } + + const auto primitive_type = type_desc->get_primitive_type(); + if (!is_complex_type(primitive_type)) { + if (value_type == simdjson::ondemand::json_type::string) { + std::string_view value_string; + if constexpr (use_string_cache) { + const auto cache_key = value.raw_json().value(); + if (_cached_string_values.contains(cache_key)) { + value_string = _cached_string_values[cache_key]; + } else { + value_string = value.get_string(); + _cached_string_values.emplace(cache_key, value_string); + } + } else { + value_string = value.get_string(); + } + Slice slice {value_string.data(), value_string.size()}; + RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, slice, + _serde_options)); + } else if (value_type == simdjson::ondemand::json_type::boolean) { + const char* str_value = value.get_bool() ? "1" : "0"; + Slice slice {str_value, 1}; + RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, slice, + _serde_options)); + } else { + std::string_view json_str = simdjson::to_json_string(value); + Slice slice {json_str.data(), json_str.size()}; + RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, slice, + _serde_options)); + } + } else if (primitive_type == TYPE_STRUCT) { + if (value_type != simdjson::ondemand::json_type::object) { + return Status::DataQualityError( + "Json value isn't object, but the column `{}` is struct.", column_name); + } + const auto* type_struct = + assert_cast(remove_nullable(type_desc).get()); + auto* struct_column_ptr = assert_cast(data_column_ptr); + const auto sub_serdes = data_serde->get_nested_serdes(); + std::map sub_col_name_to_idx; + for (size_t sub_col_idx = 0; sub_col_idx < type_struct->get_elements().size(); + ++sub_col_idx) { + sub_col_name_to_idx.emplace(lower_key(type_struct->get_element_name(sub_col_idx)), + sub_col_idx); + } + std::vector has_value(type_struct->get_elements().size(), false); + simdjson::ondemand::object struct_value = value.get_object(); + for (auto sub : struct_value) { + const auto sub_key = lower_key(sub.unescaped_key().value()); + const auto it = sub_col_name_to_idx.find(sub_key); + if (it == sub_col_name_to_idx.end()) { + continue; + } + const auto sub_column_idx = it->second; + auto sub_column_ptr = struct_column_ptr->get_column(sub_column_idx).get_ptr(); + if (has_value[sub_column_idx]) { + // Struct fields follow Hive-style duplicate handling: the last matching nested key + // wins. Remove the earlier nested value before appending the new one. + sub_column_ptr->pop_back(1); + } + has_value[sub_column_idx] = true; + auto sub_value = sub.value().value(); + RETURN_IF_ERROR(_write_data_to_column( + sub_value, type_struct->get_element(sub_column_idx), sub_column_ptr.get(), + column_name + "." + sub_key, sub_serdes[sub_column_idx], valid)); + } + for (size_t sub_col_idx = 0; sub_col_idx < type_struct->get_elements().size(); + ++sub_col_idx) { + if (has_value[sub_col_idx]) { + continue; + } + auto sub_column_ptr = struct_column_ptr->get_column(sub_col_idx).get_ptr(); + if (!is_column_nullable(*sub_column_ptr)) { + return Status::DataQualityError( + "Json file structColumn miss field {} and this column isn't nullable.", + column_name + "." + type_struct->get_element_name(sub_col_idx)); + } + sub_column_ptr->insert_default(); + } + } else if (primitive_type == TYPE_MAP) { + if (value_type != simdjson::ondemand::json_type::object) { + return Status::DataQualityError("Json value isn't object, but the column `{}` is map.", + column_name); + } + const auto* map_type = assert_cast(remove_nullable(type_desc).get()); + auto* map_column_ptr = assert_cast(data_column_ptr); + const auto sub_serdes = data_serde->get_nested_serdes(); + size_t field_count = 0; + simdjson::ondemand::object object_value = value.get_object(); + for (auto member_value : object_value) { + auto* key_column = map_column_ptr->get_keys_ptr()->assert_mutable()->get_ptr().get(); + auto key_serde = sub_serdes[0]; + if (is_column_nullable(*key_column)) { + auto* nullable_key = assert_cast(key_column); + nullable_key->get_null_map_data().push_back(0); + key_column = nullable_key->get_nested_column().get_ptr().get(); + if (map_type->get_key_type()->is_nullable()) { + key_serde = key_serde->get_nested_serdes()[0]; + } + } + std::string_view key_view = member_value.unescaped_key().value(); + Slice key_slice(key_view.data(), key_view.size()); + RETURN_IF_ERROR(key_serde->deserialize_one_cell_from_json(*key_column, key_slice, + _serde_options)); + simdjson::ondemand::value field_value = member_value.value().value(); + RETURN_IF_ERROR(_write_data_to_column( + field_value, map_type->get_value_type(), + map_column_ptr->get_values_ptr()->assert_mutable()->get_ptr().get(), + column_name + ".value", sub_serdes[1], valid)); + ++field_count; + } + auto& offsets = map_column_ptr->get_offsets(); + offsets.emplace_back(offsets.back() + field_count); + } else if (primitive_type == TYPE_ARRAY) { + if (value_type != simdjson::ondemand::json_type::array) { + return Status::DataQualityError("Json value isn't array, but the column `{}` is array.", + column_name); + } + const auto* array_type = + assert_cast(remove_nullable(type_desc).get()); + auto* array_column_ptr = assert_cast(data_column_ptr); + const auto sub_serdes = data_serde->get_nested_serdes(); + size_t field_count = 0; + simdjson::ondemand::array array_value = value.get_array(); + for (simdjson::ondemand::value sub_value : array_value) { + RETURN_IF_ERROR(_write_data_to_column( + sub_value, array_type->get_nested_type(), + array_column_ptr->get_data().get_ptr().get(), column_name + ".element", + sub_serdes[0], valid)); + ++field_count; + } + auto& offsets = array_column_ptr->get_offsets(); + offsets.emplace_back(offsets.back() + field_count); + } else { + return Status::InternalError("Not support JSON value to complex column"); + } + + if (nullable_column && value_type != simdjson::ondemand::json_type::null) { + nullable_column->get_null_map_data().push_back(0); + } + *valid = true; + return Status::OK(); +} + +Status JsonReader::_fill_missing_column(const RequestedColumn& column, IColumn* column_ptr, + bool* valid) { + if (column.slot_desc->is_nullable()) { + auto* nullable_column = assert_cast(column_ptr); + nullable_column->insert_default(); + *valid = true; + return Status::OK(); + } + return Status::DataQualityError( + "The column `{}` is not nullable, but it's not found in jsondata.", + column.slot_desc->col_name()); +} + +Status JsonReader::_append_null_for_malformed_json(Block* block) { + DORIS_CHECK(block != nullptr); + for (int i = 0; i < block->columns(); ++i) { + auto& column_with_type = block->get_by_position(i); + if (!is_column_nullable(*column_with_type.column)) { + return Status::DataQualityError("malformed json, but the column `{}` is not nullable.", + column_with_type.column->get_name()); + } + auto column = IColumn::mutate(std::move(column_with_type.column)); + assert_cast(column.get())->insert_default(); + column_with_type.column = std::move(column); + } + return Status::OK(); +} + +Status JsonReader::_handle_json_error(const Status& status, Block* block, size_t original_rows, + bool* is_empty_row) { + DORIS_CHECK(block != nullptr); + DORIS_CHECK(is_empty_row != nullptr); + // Deserialization can fail after several columns have already appended data. Always restore the + // block to the row count before this document before either surfacing the error or appending + // the ignore-malformed null row. + _truncate_block_to_rows(block, original_rows); + if (_openx_json_ignore_malformed && status.is()) { + RETURN_IF_ERROR(_append_null_for_malformed_json(block)); + *is_empty_row = false; + return Status::OK(); + } + return status; +} + +Status JsonReader::_apply_filters(Block* file_block, size_t* rows) { + return apply_materialized_reader_filters(_request.get(), _io_ctx.get(), file_block, rows); +} + +void JsonReader::_truncate_block_to_rows(Block* block, size_t num_rows) { + DORIS_CHECK(block != nullptr); + for (int i = 0; i < block->columns(); ++i) { + auto& column_with_type = block->get_by_position(i); + auto column = IColumn::mutate(std::move(column_with_type.column)); + if (column->size() > num_rows) { + column->pop_back(column->size() - num_rows); + } + column_with_type.column = std::move(column); + } +} + +void JsonReader::_pop_back_last_inserted_value(Block* block, size_t column_index) { + DORIS_CHECK(block != nullptr); + auto& column = block->get_by_position(column_index).column; + auto mutable_column = IColumn::mutate(std::move(column)); + mutable_column->pop_back(1); + column = std::move(mutable_column); +} + +size_t JsonReader::_column_index(std::string_view key, size_t key_index) { + std::string hive_key; + std::string_view lookup_key = key; + if (_is_hive_table) { + hive_key = lower_key(key); + lookup_key = hive_key; + } + if (key_index < _previous_positions.size()) { + // Most JSON lines share field order. Reuse the previous line's key-position mapping before + // falling back to the hash table lookup. + const auto previous = _previous_positions[key_index]; + if (previous < _requested_columns.size()) { + const auto previous_name = _requested_columns[previous].slot_desc->col_name(); + if ((_is_hive_table ? lower_key(previous_name) : previous_name) == lookup_key) { + return previous; + } + } + } + const auto it = _slot_name_to_index.find(std::string(lookup_key)); + if (it == _slot_name_to_index.end()) { + return static_cast(-1); + } + if (key_index >= _previous_positions.size()) { + _previous_positions.resize(key_index + 1, static_cast(-1)); + } + _previous_positions[key_index] = it->second; + return it->second; +} + +bool JsonReader::_is_root_path_for_column(const RequestedColumn& column) const { + return column.source_index < _parsed_jsonpaths.size() && + JsonFunctions::is_root_path(_parsed_jsonpaths[column.source_index]); +} + +} // namespace doris::format::json diff --git a/be/src/format_v2/json/json_reader.h b/be/src/format_v2/json/json_reader.h new file mode 100644 index 00000000000000..52cdfad6728d64 --- /dev/null +++ b/be/src/format_v2/json/json_reader.h @@ -0,0 +1,179 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include // IWYU pragma: keep + +#include +#include +#include +#include +#include +#include + +#include "core/custom_allocator.h" +#include "core/data_type_serde/data_type_serde.h" +#include "exprs/json_functions.h" +#include "format_v2/file_reader.h" +#include "gen_cpp/PlanNodes_types.h" +#include "runtime/runtime_profile.h" + +namespace doris { +class Decompressor; +class LineReader; +class SlotDescriptor; +class IColumn; +} // namespace doris + +namespace doris::format::json { + +// FileScannerV2 JSON reader. +// +// JSON files do not carry an embedded physical schema. The v2 table layer still needs a +// file-local schema and FileScanRequest contract, so this reader exposes FE-provided file slots as +// v2 file-local columns and performs JSON parsing/materialization directly in the v2 path. +class JsonReader final : public FileReader { +public: + // `file_slot_descs` is the FE-planned file schema. JSON has no physical schema, so the reader + // exposes these slots as synthetic file-local columns and materializes only the columns + // requested by FileScanRequest. + JsonReader(std::shared_ptr& system_properties, + std::unique_ptr& file_description, + std::shared_ptr io_ctx, RuntimeProfile* profile, + const TFileScanRangeParams* scan_params, const TFileRangeDesc& range, + const std::vector& file_slot_descs, + TFileCompressType::type range_compress_type = TFileCompressType::UNKNOWN, + std::optional stream_load_id = std::nullopt); + ~JsonReader() override; + + // Initializes scan attributes and builds the synthetic schema from FE slots. + Status init(RuntimeState* state) override; + Status get_schema(std::vector* file_schema) const override; + std::unique_ptr create_column_mapper( + TableColumnMapperOptions options) const override; + // Opens the underlying file or stream and binds requested local column ids to output block + // positions. After this call, `get_block` can be called until it returns eof. + Status open(std::shared_ptr request) override; + // Appends rows into `file_block` according to the FileScanRequest order. The block must already + // contain columns matching the requested positions. + Status get_block(Block* file_block, size_t* rows, bool* eof) override; + Status close() override; + +private: + // A requested column keeps both identities: + // - `source_index`: index in FE file slots, used for jsonpaths and SerDe lookup. + // - `block_position`: index in the caller's output block, used for materialization. + struct RequestedColumn { + LocalColumnId file_column_id = LocalColumnId::invalid(); + LocalIndex block_position; + size_t source_index = 0; + SlotDescriptor* slot_desc = nullptr; + DataTypeSerDeSPtr serde; + }; + + Status _build_requested_columns(const FileScanRequest& request, + std::vector* columns) const; + // Reconciles TableReader's split/range descriptor with FileReader's concrete file description. + TFileRangeDesc _json_range() const; + Status _open_file_reader(); + Status _create_decompressor(); + Status _create_line_reader(); + Status _parse_jsonpath_and_json_root(); + // Reads one logical JSON document: one line for JSON Lines, or the whole range/pipe payload for + // single-document mode. + Status _read_one_document(size_t* size, bool* eof); + Status _read_one_document_from_pipe(size_t* read_size); + // Moves the logical document into a simdjson-padded buffer and creates an ondemand document. + Status _parse_next_json(size_t* size, bool* eof); + // Applies json_root and validates the object/array shape required by strip_outer_array. + Status _extract_json_value(size_t size, bool* eof, bool* is_empty_row); + Status _append_rows_from_current_value(Block* block, bool* is_empty_row, bool* eof); + Status _append_simple_json_rows(Block* block, bool* is_empty_row, bool* eof); + Status _append_flat_array_jsonpath_rows(Block* block, bool* is_empty_row, bool* eof); + Status _append_nested_jsonpath_row(Block* block, bool* is_empty_row, bool* eof); + Status _set_column_values_from_object(simdjson::ondemand::object* object_value, Block* block, + bool* valid); + Status _write_columns_by_jsonpath(simdjson::ondemand::object* object_value, Block* block, + bool* valid); + template + Status _write_data_to_column(simdjson::ondemand::value& value, const DataTypePtr& type_desc, + IColumn* column_ptr, const std::string& column_name, + const DataTypeSerDeSPtr& serde, bool* valid); + Status _fill_missing_column(const RequestedColumn& column, IColumn* column_ptr, bool* valid); + Status _append_null_for_malformed_json(Block* block); + Status _handle_json_error(const Status& status, Block* block, size_t original_rows, + bool* is_empty_row); + Status _apply_filters(Block* file_block, size_t* rows); + void _truncate_block_to_rows(Block* block, size_t num_rows); + void _pop_back_last_inserted_value(Block* block, size_t column_index); + size_t _column_index(std::string_view key, size_t key_index); + bool _is_root_path_for_column(const RequestedColumn& column) const; + + const TFileScanRangeParams* _scan_params = nullptr; + TFileRangeDesc _range; + TFileRangeDesc _reader_range; + std::vector _source_file_slot_descs; + DataTypeSerDeSPtrs _source_serdes; + std::vector _file_schema; + RuntimeState* _runtime_state = nullptr; + TFileCompressType::type _range_compress_type = TFileCompressType::UNKNOWN; + std::optional _stream_load_id; + std::vector _requested_columns; + std::unordered_map _slot_name_to_index; + std::vector _previous_positions; + + io::FileReaderSPtr _physical_file_reader; + std::unique_ptr _decompressor; + std::unique_ptr _line_reader; + int64_t _current_offset = 0; + bool _reader_eof = false; + bool _skip_first_line = false; + bool _single_document_read = false; + + std::string _line_delimiter; + size_t _line_delimiter_length = 0; + std::string _jsonpaths; + std::string _json_root; + bool _read_json_by_line = false; + bool _strip_outer_array = false; + bool _num_as_string = false; + bool _fuzzy_parse = false; + bool _is_hive_table = false; + bool _openx_json_ignore_malformed = false; + TFileCompressType::type _file_compress_type = TFileCompressType::UNKNOWN; + + std::vector> _parsed_jsonpaths; + std::vector _parsed_json_root; + bool _parsed_from_json_root = false; + DataTypeSerDe::FormatOptions _serde_options; + + // simdjson ondemand values point into `_padding_buffer`, so the buffer must outlive all values + // created from the current document. + std::unique_ptr _json_parser; + simdjson::ondemand::document _original_json_doc; + simdjson::ondemand::value _json_value; + simdjson::ondemand::array _array; + simdjson::ondemand::array_iterator _array_iter; + std::string _document_buffer; + std::string _padding_buffer; + size_t _original_doc_size = 0; + size_t _padded_size = 1024 * 1024 * 8 + simdjson::SIMDJSON_PADDING; + std::unordered_map _cached_string_values; +}; + +} // namespace doris::format::json diff --git a/be/src/format_v2/materialized_reader_util.cpp b/be/src/format_v2/materialized_reader_util.cpp new file mode 100644 index 00000000000000..a7e533633510c4 --- /dev/null +++ b/be/src/format_v2/materialized_reader_util.cpp @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/materialized_reader_util.h" + +#include + +#include "core/block/block.h" +#include "core/data_type/data_type_nullable.h" +#include "exprs/vexpr_context.h" +#include "format_v2/file_reader.h" +#include "io/io_common.h" + +namespace doris::format { +namespace { + +void update_counter(RuntimeProfile::Counter* counter, int64_t value) { + if (counter != nullptr) { + COUNTER_UPDATE(counter, value); + } +} + +} // namespace + +ColumnPtr make_column_nullable_if_needed(ColumnPtr column, const DataTypePtr& target_type) { + if (target_type != nullptr && target_type->is_nullable() && column.get() != nullptr && + !column->is_nullable()) { + return make_nullable(std::move(column)); + } + return column; +} + +Status apply_materialized_reader_filters(const FileScanRequest* request, io::IOContext* io_ctx, + Block* file_block, size_t* rows, + const MaterializedReaderFilterProfile* profile) { + DORIS_CHECK(file_block != nullptr); + DORIS_CHECK(rows != nullptr); + const size_t rows_before_filter = *rows; + size_t rows_after_delete_filter = rows_before_filter; + if (request != nullptr && rows_before_filter > 0 && !request->delete_conjuncts.empty()) { + { + SCOPED_TIMER(profile == nullptr ? nullptr : profile->delete_conjunct_filter_time); + RETURN_IF_ERROR(VExprContext::filter_block(request->delete_conjuncts, file_block, + file_block->columns())); + } + rows_after_delete_filter = + file_block->columns() == 0 ? rows_before_filter : file_block->rows(); + if (profile != nullptr) { + update_counter(profile->rows_filtered_by_delete_conjunct, + rows_before_filter - rows_after_delete_filter); + } + } + + size_t rows_after_filter = rows_after_delete_filter; + if (request != nullptr && rows_after_delete_filter > 0 && !request->conjuncts.empty()) { + { + SCOPED_TIMER(profile == nullptr ? nullptr : profile->conjunct_filter_time); + RETURN_IF_ERROR(VExprContext::filter_block(request->conjuncts, file_block, + file_block->columns())); + } + rows_after_filter = + file_block->columns() == 0 ? rows_after_delete_filter : file_block->rows(); + const auto rows_filtered_by_conjunct = rows_after_delete_filter - rows_after_filter; + if (profile != nullptr) { + update_counter(profile->rows_filtered_by_conjunct, rows_filtered_by_conjunct); + } + if (io_ctx != nullptr) { + io_ctx->predicate_filtered_rows += rows_filtered_by_conjunct; + } + } + *rows = rows_after_filter; + return Status::OK(); +} + +} // namespace doris::format diff --git a/be/src/format_v2/materialized_reader_util.h b/be/src/format_v2/materialized_reader_util.h new file mode 100644 index 00000000000000..2fb1383dfb9569 --- /dev/null +++ b/be/src/format_v2/materialized_reader_util.h @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "common/status.h" +#include "core/column/column.h" +#include "core/data_type/data_type.h" +#include "runtime/runtime_profile.h" + +namespace doris { +class Block; + +namespace io { +struct IOContext; +} // namespace io + +namespace format { +struct FileScanRequest; + +// Shared helpers for FileReader implementations that deserialize or build already materialized +// Doris columns and then hand those columns to TableReader for final mapping. +ColumnPtr make_column_nullable_if_needed(ColumnPtr column, const DataTypePtr& target_type); + +// Optional profile counters for text-like readers. Native/JSON do not expose per-reader filter +// counters today, so they call apply_materialized_reader_filters() without this struct. +struct MaterializedReaderFilterProfile { + RuntimeProfile::Counter* delete_conjunct_filter_time = nullptr; + RuntimeProfile::Counter* conjunct_filter_time = nullptr; + RuntimeProfile::Counter* rows_filtered_by_delete_conjunct = nullptr; + RuntimeProfile::Counter* rows_filtered_by_conjunct = nullptr; +}; + +// Applies file-local filters in the same order used by FileScannerV2 readers: +// 1. delete_conjuncts remove rows that should not be visible to the scan output; +// 2. conjuncts apply ordinary file-local predicates. +// +// Only ordinary conjunct filtering contributes to IOContext::predicate_filtered_rows. This matches +// the previous JSON/Text/CSV behavior and keeps scanner accounting separate from delete filtering. +// When `profile` is provided, the helper also updates text-reader timer and row counters so CSV +// and Hive text keep their existing observability after sharing this implementation. +Status apply_materialized_reader_filters(const FileScanRequest* request, io::IOContext* io_ctx, + Block* file_block, size_t* rows, + const MaterializedReaderFilterProfile* profile = nullptr); + +} // namespace format +} // namespace doris diff --git a/be/src/format_v2/native/native_reader.cpp b/be/src/format_v2/native/native_reader.cpp new file mode 100644 index 00000000000000..2a0a89f80adc8d --- /dev/null +++ b/be/src/format_v2/native/native_reader.cpp @@ -0,0 +1,311 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/native/native_reader.h" + +#include +#include + +#include "common/cast_set.h" +#include "core/block/block.h" +#include "core/data_type/data_type_factory.hpp" +#include "core/data_type/data_type_nullable.h" +#include "format/native/native_format.h" +#include "format_v2/column_mapper.h" +#include "format_v2/materialized_reader_util.h" +#include "io/file_factory.h" +#include "io/fs/tracing_file_reader.h" +#include "runtime/runtime_state.h" +#include "util/slice.h" + +namespace doris::format::native { +namespace { + +Status parse_native_pblock(const std::string& buffer, const std::string& path, PBlock* pblock) { + DORIS_CHECK(pblock != nullptr); + if (!pblock->ParseFromArray(buffer.data(), cast_set(buffer.size()))) { + return Status::InternalError("Failed to parse native PBlock from file {}", path); + } + return Status::OK(); +} + +} // namespace + +NativeReader::NativeReader(std::shared_ptr& system_properties, + std::unique_ptr& file_description, + std::shared_ptr io_ctx, RuntimeProfile* profile) + : FileReader(system_properties, file_description, std::move(io_ctx), profile) {} + +NativeReader::~NativeReader() { + static_cast(close()); +} + +Status NativeReader::init(RuntimeState* state) { + _runtime_state = state; + if (_file_description == nullptr) { + return Status::InvalidArgument("Native v2 reader requires file description"); + } + RETURN_IF_ERROR(FileReader::init(state)); + RETURN_IF_ERROR(_validate_and_consume_header()); + return Status::OK(); +} + +Status NativeReader::get_schema(std::vector* file_schema) const { + if (file_schema == nullptr) { + return Status::InvalidArgument("Native v2 file_schema is null"); + } + RETURN_IF_ERROR(_ensure_schema_loaded()); + *file_schema = _file_schema; + return Status::OK(); +} + +std::unique_ptr NativeReader::create_column_mapper( + TableColumnMapperOptions options) const { + return std::make_unique(std::move(options)); +} + +Status NativeReader::open(std::shared_ptr request) { + RETURN_IF_ERROR(FileReader::open(std::move(request))); + DORIS_CHECK(_request != nullptr); + _first_block_consumed = false; + _reader_eof = false; + _eof = false; + return Status::OK(); +} + +Status NativeReader::get_block(Block* file_block, size_t* rows, bool* eof) { + DORIS_CHECK(file_block != nullptr); + DORIS_CHECK(rows != nullptr); + DORIS_CHECK(eof != nullptr); + if (_request == nullptr) { + return Status::InternalError("Native v2 reader is not open"); + } + + *rows = 0; + *eof = false; + if (_reader_eof) { + *eof = true; + _eof = true; + return Status::OK(); + } + + std::string buffer; + bool local_eof = false; + if (_first_block_loaded && !_first_block_consumed) { + buffer = _first_block_buffer; + } else { + RETURN_IF_ERROR(_read_next_pblock(&buffer, &local_eof)); + } + + if (local_eof && buffer.empty()) { + _reader_eof = true; + *eof = true; + _eof = true; + return Status::OK(); + } + if (buffer.empty()) { + return Status::InternalError("read empty native block from file {}", + _file_description->path); + } + + PBlock pblock; + RETURN_IF_ERROR(parse_native_pblock(buffer, _file_description->path, &pblock)); + if (!_schema_inited) { + RETURN_IF_ERROR(_init_schema_from_pblock(pblock)); + } + + Block source_block; + size_t uncompressed_bytes = 0; + int64_t decompress_time = 0; + RETURN_IF_ERROR(source_block.deserialize(pblock, &uncompressed_bytes, &decompress_time)); + RETURN_IF_ERROR(_materialize_requested_columns(source_block, file_block)); + *rows = file_block->rows(); + RETURN_IF_ERROR(_apply_filters(file_block, rows)); + _reader_statistics.read_rows += *rows; + + if (_first_block_loaded && !_first_block_consumed) { + _first_block_consumed = true; + } + if (_current_offset >= _file_size) { + _reader_eof = true; + } + *eof = _reader_eof && *rows == 0; + _eof = *eof; + return Status::OK(); +} + +Status NativeReader::close() { + _file_reader.reset(); + _tracing_file_reader.reset(); + _request.reset(); + _reader_eof = true; + _eof = true; + return Status::OK(); +} + +Status NativeReader::_validate_and_consume_header() { + DORIS_CHECK(_tracing_file_reader != nullptr); + _file_size = _tracing_file_reader->size(); + _current_offset = 0; + _reader_eof = (_file_size == 0); + + static constexpr size_t HEADER_SIZE = sizeof(DORIS_NATIVE_MAGIC) + sizeof(uint32_t); + if (_reader_eof || _file_size < cast_set(HEADER_SIZE)) { + return Status::InternalError( + "invalid Doris Native file {}, file size {} is smaller than header size {}", + _file_description->path, _file_size, HEADER_SIZE); + } + + char header[HEADER_SIZE]; + Slice header_slice(header, sizeof(header)); + size_t bytes_read = 0; + RETURN_IF_ERROR(_tracing_file_reader->read_at(0, header_slice, &bytes_read, _io_ctx.get())); + if (bytes_read != sizeof(header)) { + return Status::InternalError( + "failed to read Doris Native header from file {}, expect {} bytes, got {} bytes", + _file_description->path, sizeof(header), bytes_read); + } + if (std::memcmp(header, DORIS_NATIVE_MAGIC, sizeof(DORIS_NATIVE_MAGIC)) != 0) { + return Status::InternalError("invalid Doris Native magic header in file {}", + _file_description->path); + } + + uint32_t version = 0; + std::memcpy(&version, header + sizeof(DORIS_NATIVE_MAGIC), sizeof(uint32_t)); + if (version != DORIS_NATIVE_FORMAT_VERSION) { + return Status::InternalError( + "unsupported Doris Native format version {} in file {}, expect {}", version, + _file_description->path, DORIS_NATIVE_FORMAT_VERSION); + } + + _current_offset = sizeof(header); + _reader_eof = (_file_size == _current_offset); + return Status::OK(); +} + +Status NativeReader::_ensure_schema_loaded() const { + if (_schema_inited) { + return Status::OK(); + } + if (!_first_block_loaded) { + bool local_eof = false; + RETURN_IF_ERROR(_read_next_pblock(&_first_block_buffer, &local_eof)); + if (local_eof && _first_block_buffer.empty()) { + return Status::EndOfFile("empty native file {}", _file_description->path); + } + if (_first_block_buffer.empty()) { + return Status::InternalError("first native block is empty {}", _file_description->path); + } + _first_block_loaded = true; + } + + PBlock pblock; + RETURN_IF_ERROR(parse_native_pblock(_first_block_buffer, _file_description->path, &pblock)); + RETURN_IF_ERROR(_init_schema_from_pblock(pblock)); + return Status::OK(); +} + +Status NativeReader::_read_next_pblock(std::string* buffer, bool* eof) const { + DORIS_CHECK(buffer != nullptr); + DORIS_CHECK(eof != nullptr); + DORIS_CHECK(_tracing_file_reader != nullptr); + buffer->clear(); + *eof = false; + + if (_current_offset >= _file_size) { + *eof = true; + return Status::OK(); + } + + uint64_t block_len = 0; + Slice len_slice(reinterpret_cast(&block_len), sizeof(block_len)); + size_t bytes_read = 0; + RETURN_IF_ERROR( + _tracing_file_reader->read_at(_current_offset, len_slice, &bytes_read, _io_ctx.get())); + if (bytes_read == 0) { + *eof = true; + return Status::OK(); + } + if (bytes_read != sizeof(block_len)) { + return Status::InternalError( + "Failed to read native block length from file {}, expect {}, actual {}", + _file_description->path, sizeof(block_len), bytes_read); + } + _current_offset += sizeof(block_len); + if (block_len == 0) { + *eof = (_current_offset >= _file_size); + return Status::OK(); + } + + buffer->assign(block_len, '\0'); + Slice data_slice(buffer->data(), block_len); + bytes_read = 0; + RETURN_IF_ERROR( + _tracing_file_reader->read_at(_current_offset, data_slice, &bytes_read, _io_ctx.get())); + if (bytes_read != block_len) { + return Status::InternalError( + "Failed to read native block body from file {}, expect {}, actual {}", + _file_description->path, block_len, bytes_read); + } + _current_offset += block_len; + *eof = (_current_offset >= _file_size); + return Status::OK(); +} + +Status NativeReader::_init_schema_from_pblock(const PBlock& pblock) const { + _file_schema.clear(); + _file_schema.reserve(pblock.column_metas_size()); + for (int idx = 0; idx < pblock.column_metas_size(); ++idx) { + const auto& meta = pblock.column_metas(idx); + ColumnDefinition field; + field.identifier = Field::create_field(meta.name()); + field.local_id = idx; + field.name = meta.name(); + field.type = make_nullable(DataTypeFactory::instance().create_data_type(meta)); + _file_schema.push_back(std::move(field)); + } + _schema_inited = true; + return Status::OK(); +} + +Status NativeReader::_materialize_requested_columns(const Block& source_block, + Block* file_block) const { + DORIS_CHECK(file_block != nullptr); + DORIS_CHECK(_request != nullptr); + for (const auto& [file_column_id, block_position] : _request->local_positions) { + const auto source_idx = file_column_id.value(); + if (source_idx < 0 || cast_set(source_idx) >= source_block.columns()) { + return Status::InternalError("native file {} does not contain local column id {}", + _file_description->path, source_idx); + } + if (block_position.value() >= file_block->columns()) { + return Status::InternalError("native v2 request has invalid block position {}", + block_position.value()); + } + const auto& target = file_block->get_by_position(block_position.value()); + auto column = source_block.get_by_position(source_idx).column; + column = make_column_nullable_if_needed(std::move(column), target.type); + file_block->replace_by_position(block_position.value(), IColumn::mutate(std::move(column))); + } + return Status::OK(); +} + +Status NativeReader::_apply_filters(Block* file_block, size_t* rows) const { + return apply_materialized_reader_filters(_request.get(), _io_ctx.get(), file_block, rows); +} + +} // namespace doris::format::native diff --git a/be/src/format_v2/native/native_reader.h b/be/src/format_v2/native/native_reader.h new file mode 100644 index 00000000000000..3719a6afd6c4f5 --- /dev/null +++ b/be/src/format_v2/native/native_reader.h @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include +#include +#include +#include + +#include "format_v2/file_reader.h" + +namespace doris::format::native { + +// FileScannerV2 reader for Doris Native files. +// +// Native files are self-describing only through the first serialized PBlock. TableReader asks for +// schema before open(), so this reader may read and cache that first PBlock during get_schema() and +// then replay it as the first data batch after open(). +class NativeReader final : public FileReader { +public: + NativeReader(std::shared_ptr& system_properties, + std::unique_ptr& file_description, + std::shared_ptr io_ctx, RuntimeProfile* profile); + ~NativeReader() override; + + Status init(RuntimeState* state) override; + Status get_schema(std::vector* file_schema) const override; + std::unique_ptr create_column_mapper( + TableColumnMapperOptions options) const override; + Status open(std::shared_ptr request) override; + Status get_block(Block* file_block, size_t* rows, bool* eof) override; + Status close() override; + +private: + Status _validate_and_consume_header(); + Status _ensure_schema_loaded() const; + Status _read_next_pblock(std::string* buffer, bool* eof) const; + Status _init_schema_from_pblock(const PBlock& pblock) const; + Status _materialize_requested_columns(const Block& source_block, Block* file_block) const; + Status _apply_filters(Block* file_block, size_t* rows) const; + + RuntimeState* _runtime_state = nullptr; + mutable int64_t _current_offset = 0; + mutable int64_t _file_size = 0; + mutable bool _reader_eof = true; + mutable bool _schema_inited = false; + mutable std::vector _file_schema; + mutable std::string _first_block_buffer; + mutable bool _first_block_loaded = false; + mutable bool _first_block_consumed = false; +}; + +} // namespace doris::format::native diff --git a/be/src/format_v2/parquet/parquet_column_schema.cpp b/be/src/format_v2/parquet/parquet_column_schema.cpp new file mode 100644 index 00000000000000..b42d47987a54cb --- /dev/null +++ b/be/src/format_v2/parquet/parquet_column_schema.cpp @@ -0,0 +1,492 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/parquet_column_schema.h" + +#include + +#include +#include +#include +#include + +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_struct.h" +#include "format_v2/parquet/parquet_type.h" + +namespace doris::format::parquet { +namespace { + +struct SchemaBuildContext { + int32_t local_id = -1; // child ordinal in the parent node + int16_t definition_level = 0; // accumulated optional/repeated level count + int16_t repetition_level = 0; // accumulated repeated level count + int16_t nullable_definition_level = 0; // definition level of the nearest optional node + int16_t repeated_repetition_level = 0; // repetition level of the nearest repeated node + int16_t repeated_ancestor_definition_level = 0; // definition level of the nearest repeated node +}; + +enum class SchemaBuildMode { + // Normal recursive schema build. Bare repeated fields are exposed as Doris ARRAY for + // protobuf/legacy Parquet compatibility, while repeated LIST/MAP annotated groups are rejected + // because Parquet LIST/MAP outer groups are not allowed to be repeated at a top-level or struct + // field boundary. + NORMAL, + // Build the current repeated node as the already-selected element of an enclosing LIST. This + // is the compatibility path for Arrow/parquet-format legacy two-level LIST encodings where the + // repeated node itself is the array element instead of a wrapper that should be stripped. + REPEATED_NODE_AS_LIST_ELEMENT, + // Build the current repeated group as a STRUCT element of an enclosing LIST, ignoring LIST/MAP + // annotations on the repeated group itself. This keeps compatibility with the old Doris + // Parquet schema parser for Hive/legacy wrappers named "array" or "_tuple". + REPEATED_NODE_AS_STRUCT_ELEMENT, +}; + +// Result of applying Parquet LIST backward compatibility rules to the single repeated child of a +// LIST-annotated group. The repeated child can either be a physical wrapper whose only child is the +// element, or the element node itself. +struct ListElementResolution { + // Parquet node that should be exposed as Doris ARRAY element. + const ::parquet::schema::Node* element_node = nullptr; + // Level state after consuming the LIST repeated child. The parent ARRAY schema keeps this state + // to materialize offsets, empty arrays and null arrays. + SchemaBuildContext repeated_context; + // Level state used to build element_node. This equals repeated_context when the repeated child + // itself is the element, and includes the wrapper's only child when standard 3-level LIST + // encoding is stripped. + SchemaBuildContext element_context; + // Build mode for element_node. Non-NORMAL modes mean element_node is the repeated child itself, + // and the repeated level must not be interpreted as a second unrelated array at the same + // boundary. + SchemaBuildMode element_build_mode = SchemaBuildMode::NORMAL; +}; + +// Resolved repeated entry group of a MAP-annotated group. The entry wrapper is a physical Parquet +// encoding detail; Doris folds it into the parent MAP schema and exposes only direct [key, value] +// children. +struct MapEntryResolution { + const ::parquet::schema::GroupNode* entry_group = nullptr; + // Level state after consuming the repeated entry group. The parent MAP schema keeps this state + // to materialize offsets, empty maps and null maps. + SchemaBuildContext entry_context; +}; + +bool is_list_node(const ::parquet::schema::Node& node) { + const auto& logical_type = node.logical_type(); + return node.converted_type() == ::parquet::ConvertedType::LIST || + (logical_type != nullptr && logical_type->is_valid() && logical_type->is_list()); +} + +bool is_map_node(const ::parquet::schema::Node& node) { + const auto& logical_type = node.logical_type(); + return node.converted_type() == ::parquet::ConvertedType::MAP || + node.converted_type() == ::parquet::ConvertedType::MAP_KEY_VALUE || + (logical_type != nullptr && logical_type->is_valid() && logical_type->is_map()); +} + +bool has_logical_annotation(const ::parquet::schema::Node& node) { + const auto& logical_type = node.logical_type(); + return (node.converted_type() != ::parquet::ConvertedType::NONE && + node.converted_type() != ::parquet::ConvertedType::UNDEFINED) || + (logical_type != nullptr && logical_type->is_valid() && !logical_type->is_none()); +} + +bool has_structural_list_name(const std::string& list_name, const std::string& repeated_name) { + return repeated_name == "array" || repeated_name == list_name + "_tuple"; +} + +bool should_build_repeated_field_as_list(const ::parquet::schema::Node& node) { + return node.is_repeated() && !is_list_node(node) && !is_map_node(node); +} + +DataTypePtr nullable_if_needed(DataTypePtr type, const ::parquet::schema::Node& node) { + return node.is_optional() ? make_nullable(type) : type; +} + +void inherit_common_schema_state(const ::parquet::schema::Node& node, + const SchemaBuildContext& context, + ParquetColumnSchema* column_schema) { + DORIS_CHECK(column_schema != nullptr); + column_schema->local_id = context.local_id; + column_schema->parquet_field_id = node.field_id(); + column_schema->name = node.name(); + column_schema->max_definition_level = context.definition_level; + column_schema->max_repetition_level = context.repetition_level; + column_schema->nullable_definition_level = context.nullable_definition_level; + column_schema->definition_level = context.definition_level; + column_schema->repetition_level = context.repetition_level; + column_schema->repeated_ancestor_definition_level = context.repeated_ancestor_definition_level; + column_schema->repeated_repetition_level = context.repeated_repetition_level; +} + +SchemaBuildContext child_context(const SchemaBuildContext& parent, + const ::parquet::schema::Node& child_node, int32_t child_idx) { + SchemaBuildContext result = parent; + result.local_id = child_idx; + if (child_node.repetition() == ::parquet::Repetition::OPTIONAL) { + result.definition_level++; + result.nullable_definition_level = result.definition_level; + } + if (child_node.is_repeated()) { + result.repetition_level++; + result.definition_level++; + result.repeated_repetition_level = result.repetition_level; + result.repeated_ancestor_definition_level = result.definition_level; + } + return result; +} + +void propagate_child_levels(ParquetColumnSchema* column_schema) { + DORIS_CHECK(column_schema != nullptr); + for (const auto& child : column_schema->children) { + column_schema->max_definition_level = + std::max(column_schema->max_definition_level, child->max_definition_level); + column_schema->max_repetition_level = + std::max(column_schema->max_repetition_level, child->max_repetition_level); + } +} + +// Mirrors Arrow's ResolveList() compatibility rules, but only decides which Parquet node is the +// logical LIST element. The caller still builds Doris' semantic LIST->[element] schema tree. +// Important cases: +// - repeated primitive: the primitive itself is the element (legacy two-level LIST). +// - repeated group with multiple children: the group itself is a STRUCT element. +// - repeated group named "array" or "_tuple": the group itself is a STRUCT element per +// Parquet backward compatibility rules, even when it has one child or its own logical annotation. +// This also keeps v2 file-local schema aligned with Doris' old schema parser used by HDFS TVF. +// - other repeated group with a logical annotation, or whose only child is repeated: the group +// itself is the element. This preserves nested LIST/MAP and repeated fields inside struct +// elements. +// - otherwise, strip the one-child repeated wrapper as standard three-level LIST encoding. +Status resolve_list_element_node(const ::parquet::schema::GroupNode& list_group, + const SchemaBuildContext& list_context, + ListElementResolution* result) { + if (result == nullptr) { + return Status::InvalidArgument("result is null"); + } + if (list_group.field_count() != 1) { + return Status::NotSupported("Unsupported parquet LIST encoding for column {}", + list_group.name()); + } + const auto& repeated_node = *list_group.field(0); + if (!repeated_node.is_repeated()) { + return Status::NotSupported("Unsupported parquet LIST encoding for column {}", + list_group.name()); + } + result->repeated_context = child_context(list_context, repeated_node, 0); + if (repeated_node.is_primitive()) { + result->element_node = &repeated_node; + result->element_context = result->repeated_context; + result->element_build_mode = SchemaBuildMode::REPEATED_NODE_AS_LIST_ELEMENT; + return Status::OK(); + } + + const auto& repeated_group = static_cast(repeated_node); + if (repeated_group.field_count() == 0) { + return Status::NotSupported("Unsupported parquet LIST element layout for column {}", + list_group.name()); + } + const bool repeated_group_has_logical_annotation = has_logical_annotation(repeated_group); + if (repeated_group.field_count() > 1 || + has_structural_list_name(list_group.name(), repeated_group.name())) { + result->element_node = &repeated_node; + result->element_context = result->repeated_context; + result->element_build_mode = SchemaBuildMode::REPEATED_NODE_AS_STRUCT_ELEMENT; + return Status::OK(); + } + if (repeated_group_has_logical_annotation) { + result->element_node = &repeated_node; + result->element_context = result->repeated_context; + result->element_build_mode = SchemaBuildMode::REPEATED_NODE_AS_LIST_ELEMENT; + return Status::OK(); + } + + const auto& only_child = *repeated_group.field(0); + if (only_child.is_repeated()) { + result->element_node = &repeated_node; + result->element_context = result->repeated_context; + result->element_build_mode = SchemaBuildMode::REPEATED_NODE_AS_LIST_ELEMENT; + return Status::OK(); + } + + result->element_node = &only_child; + result->element_context = child_context(result->repeated_context, only_child, 0); + return Status::OK(); +} + +// Resolves the repeated entry group of a MAP/MAP_KEY_VALUE node. Unlike LIST, MAP has no supported +// two-level form in this reader: Doris requires a repeated group with exactly key and value +// children, then folds that physical entry group out of ParquetColumnSchema. Some external writers +// emit optional MAP keys even though standard Parquet MAP keys are required; keep the key's +// definition levels and expose it as nullable for compatibility with the old reader. +Status resolve_map_entry_group(const ::parquet::schema::GroupNode& map_group, + const SchemaBuildContext& map_context, MapEntryResolution* result) { + if (result == nullptr) { + return Status::InvalidArgument("result is null"); + } + if (map_group.field_count() != 1) { + return Status::NotSupported("Unsupported parquet MAP encoding for column {}", + map_group.name()); + } + const auto& entry_node = *map_group.field(0); + if (!entry_node.is_repeated()) { + return Status::NotSupported("Unsupported parquet MAP encoding for column {}", + map_group.name()); + } + if (entry_node.is_primitive()) { + return Status::NotSupported("Unsupported parquet MAP key_value layout for column {}", + map_group.name()); + } + const auto& entry_group = static_cast(entry_node); + if (entry_group.field_count() != 2) { + return Status::NotSupported("Unsupported parquet MAP key_value layout for column {}", + map_group.name()); + } + // The Parquet logical MAP spec requires key to be REQUIRED. Some legacy/Hive-written files + // still mark the key field OPTIONAL even when all actual keys are non-null, for example: + // optional group t_map_varchar (MAP) { + // repeated group key_value { + // optional binary key (STRING); + // optional binary value (STRING); + // } + // } + // Accept that schema here so compatible files can be read. MapColumnReader validates the + // materialized key column and rejects data that really contains null map keys. + result->entry_group = &entry_group; + result->entry_context = child_context(map_context, entry_node, 0); + return Status::OK(); +} + +Status build_node_schema_with_mode(const ::parquet::SchemaDescriptor& schema, + const ::parquet::schema::Node& node, + const SchemaBuildContext& context, + std::unique_ptr* result, + SchemaBuildMode mode); + +// Builds a semantic ARRAY schema for a bare repeated field. Arrow handles this in +// NodeToSchemaField()/GroupToSchemaField(); Doris needs the same compatibility behavior because +// protobuf and old parquet writers often encode repeated fields without a LIST annotation. +// Example: +// optional group event { +// repeated group links { +// optional binary url (UTF8); +// optional int32 rank; +// } +// } +// Doris exposes event.links as ARRAY>, not STRUCT. This keeps v2's +// file-local schema aligned with the old schema parser used by HDFS TVF schema fetching. +// When the repeated field appears inside an already resolved LIST element, only the nested repeated +// child should be wrapped: +// optional group a (LIST) { +// repeated group element { +// repeated int32 items; +// } +// } +// The outer LIST element is the repeated "element" group, and its repeated "items" child should be +// represented as a field of type ARRAY inside the struct element. +Status build_repeated_field_as_list_schema(const ::parquet::SchemaDescriptor& schema, + const ::parquet::schema::Node& repeated_node, + const SchemaBuildContext& repeated_context, + std::unique_ptr* result) { + if (result == nullptr) { + return Status::InvalidArgument("result is null"); + } + auto list_schema = std::make_unique(); + inherit_common_schema_state(repeated_node, repeated_context, list_schema.get()); + list_schema->kind = ParquetColumnSchemaKind::LIST; + list_schema->definition_level = repeated_context.definition_level; + list_schema->repetition_level = repeated_context.repetition_level; + list_schema->repeated_repetition_level = repeated_context.repeated_repetition_level; + + std::unique_ptr element_child; + RETURN_IF_ERROR(build_node_schema_with_mode(schema, repeated_node, repeated_context, + &element_child, + SchemaBuildMode::REPEATED_NODE_AS_LIST_ELEMENT)); + element_child->name = "element"; + list_schema->type = std::make_shared(element_child->type); + list_schema->children.push_back(std::move(element_child)); + propagate_child_levels(list_schema.get()); + *result = std::move(list_schema); + return Status::OK(); +} + +// Recursively builds ParquetColumnSchema for the given schema node and its children in Parquet +// file's metadata. NORMAL mode exposes bare repeated fields as ARRAY for legacy compatibility. +// REPEATED_NODE_AS_LIST_ELEMENT mode means the current repeated node was already selected as an +// enclosing LIST element, so only its nested bare repeated children should be wrapped. +Status build_node_schema_with_mode(const ::parquet::SchemaDescriptor& schema, + const ::parquet::schema::Node& node, + const SchemaBuildContext& context, + std::unique_ptr* result, + SchemaBuildMode mode) { + if (result == nullptr) { + return Status::InvalidArgument("result is null"); + } + if (mode == SchemaBuildMode::NORMAL && should_build_repeated_field_as_list(node)) { + return build_repeated_field_as_list_schema(schema, node, context, result); + } + + auto column_schema = std::make_unique(); + inherit_common_schema_state(node, context, column_schema.get()); + + if (node.is_primitive()) { + const int leaf_column_id = schema.ColumnIndex(node); + if (leaf_column_id < 0) { + return Status::InvalidArgument("Cannot find leaf column id for parquet column {}", + node.name()); + } + column_schema->kind = ParquetColumnSchemaKind::PRIMITIVE; + column_schema->leaf_column_id = leaf_column_id; + column_schema->descriptor = schema.Column(leaf_column_id); + if (column_schema->descriptor != nullptr) { + column_schema->max_definition_level = column_schema->descriptor->max_definition_level(); + column_schema->max_repetition_level = column_schema->descriptor->max_repetition_level(); + } + column_schema->type_descriptor = resolve_parquet_type(column_schema->descriptor); + column_schema->type = column_schema->type_descriptor.doris_type; + if (column_schema->type == nullptr) { + if (!column_schema->type_descriptor.unsupported_reason.empty()) { + return Status::NotSupported("Unsupported parquet column '{}': {}", node.name(), + column_schema->type_descriptor.unsupported_reason); + } + return Status::NotSupported("Unsupported parquet column type for column {}", + node.name()); + } + column_schema->type = node.is_optional() + ? make_nullable(remove_nullable(column_schema->type)) + : remove_nullable(column_schema->type); + *result = std::move(column_schema); + return Status::OK(); + } + + const auto& group = static_cast(node); + if (is_list_node(node) && mode != SchemaBuildMode::REPEATED_NODE_AS_STRUCT_ELEMENT) { + if (mode == SchemaBuildMode::NORMAL && node.is_repeated()) { + return Status::NotSupported("Unsupported repeated parquet LIST column {}", node.name()); + } + column_schema->kind = ParquetColumnSchemaKind::LIST; + ListElementResolution list_element; + RETURN_IF_ERROR(resolve_list_element_node(group, context, &list_element)); + column_schema->definition_level = list_element.repeated_context.definition_level; + column_schema->repetition_level = list_element.repeated_context.repetition_level; + column_schema->repeated_repetition_level = + list_element.repeated_context.repeated_repetition_level; + std::unique_ptr child; + RETURN_IF_ERROR(build_node_schema_with_mode(schema, *list_element.element_node, + list_element.element_context, &child, + list_element.element_build_mode)); + child->name = "element"; + column_schema->type = + nullable_if_needed(std::make_shared(child->type), node); + column_schema->children.push_back(std::move(child)); + propagate_child_levels(column_schema.get()); + *result = std::move(column_schema); + return Status::OK(); + } + + if (is_map_node(node) && mode != SchemaBuildMode::REPEATED_NODE_AS_STRUCT_ELEMENT) { + if (mode == SchemaBuildMode::NORMAL && node.is_repeated()) { + return Status::NotSupported("Unsupported repeated parquet MAP column {}", node.name()); + } + column_schema->kind = ParquetColumnSchemaKind::MAP; + MapEntryResolution map_entry; + RETURN_IF_ERROR(resolve_map_entry_group(group, context, &map_entry)); + column_schema->definition_level = map_entry.entry_context.definition_level; + column_schema->repetition_level = map_entry.entry_context.repetition_level; + column_schema->repeated_repetition_level = + map_entry.entry_context.repeated_repetition_level; + for (int child_idx = 0; child_idx < map_entry.entry_group->field_count(); ++child_idx) { + std::unique_ptr child; + RETURN_IF_ERROR(build_node_schema_with_mode( + schema, *map_entry.entry_group->field(child_idx), + child_context(map_entry.entry_context, *map_entry.entry_group->field(child_idx), + child_idx), + &child, SchemaBuildMode::NORMAL)); + child->name = child_idx == 0 ? "key" : "value"; + column_schema->children.push_back(std::move(child)); + } + if (column_schema->children.size() != 2) { + return Status::NotSupported("Unsupported parquet MAP key_value layout for column {}", + node.name()); + } + auto key_type = make_nullable(column_schema->children[0]->type); + auto value_type = make_nullable(column_schema->children[1]->type); + column_schema->type = + nullable_if_needed(std::make_shared(key_type, value_type), node); + propagate_child_levels(column_schema.get()); + *result = std::move(column_schema); + return Status::OK(); + } + + column_schema->kind = ParquetColumnSchemaKind::STRUCT; + DataTypes child_types; + Strings child_names; + child_types.reserve(group.field_count()); + child_names.reserve(group.field_count()); + for (int child_idx = 0; child_idx < group.field_count(); ++child_idx) { + const auto& child_node = *group.field(child_idx); + std::unique_ptr child; + const auto child_ctx = child_context(context, child_node, child_idx); + if (should_build_repeated_field_as_list(child_node)) { + RETURN_IF_ERROR( + build_repeated_field_as_list_schema(schema, child_node, child_ctx, &child)); + } else { + RETURN_IF_ERROR(build_node_schema_with_mode(schema, child_node, child_ctx, &child, + SchemaBuildMode::NORMAL)); + } + child_types.push_back(make_nullable(child->type)); + child_names.push_back(child->name); + column_schema->children.push_back(std::move(child)); + } + column_schema->type = + nullable_if_needed(std::make_shared(child_types, child_names), node); + propagate_child_levels(column_schema.get()); + *result = std::move(column_schema); + return Status::OK(); +} + +Status build_node_schema(const ::parquet::SchemaDescriptor& schema, + const ::parquet::schema::Node& node, const SchemaBuildContext& context, + std::unique_ptr* result) { + return build_node_schema_with_mode(schema, node, context, result, SchemaBuildMode::NORMAL); +} + +} // namespace + +Status build_parquet_column_schema(const ::parquet::SchemaDescriptor& schema, + std::vector>* fields) { + if (fields == nullptr) { + return Status::InvalidArgument("fields is null"); + } + fields->clear(); + const auto* root = schema.group_node(); + if (root == nullptr) { + return Status::InvalidArgument("Parquet schema root is null"); + } + fields->reserve(root->field_count()); + for (int field_idx = 0; field_idx < root->field_count(); ++field_idx) { + std::unique_ptr field; + SchemaBuildContext context; + RETURN_IF_ERROR(build_node_schema( + schema, *root->field(field_idx), + child_context(context, *root->field(field_idx), field_idx), &field)); + fields->push_back(std::move(field)); + } + return Status::OK(); +} + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/parquet_column_schema.h b/be/src/format_v2/parquet/parquet_column_schema.h new file mode 100644 index 00000000000000..1fb7262aabde6f --- /dev/null +++ b/be/src/format_v2/parquet/parquet_column_schema.h @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include "common/status.h" +#include "core/data_type/data_type.h" +#include "format_v2/parquet/parquet_type.h" + +namespace parquet { +class ColumnDescriptor; +class SchemaDescriptor; +} // namespace parquet + +namespace doris::format::parquet { + +enum class ParquetColumnSchemaKind { + PRIMITIVE, // primitive leaf -> ScalarColumnReader + STRUCT, // struct -> StructColumnReader + LIST, // array -> ListColumnReader + MAP, // map -> MapColumnReader +}; + +// ============================================================================ +// ============================================================================ +// ============================================================================ +struct ParquetColumnSchema { + int local_id = -1; + + int parquet_field_id = -1; + + std::string name; + + DataTypePtr type = nullptr; + + int leaf_column_id = -1; + + ParquetTypeDescriptor type_descriptor {}; + + ParquetColumnSchemaKind kind = ParquetColumnSchemaKind::PRIMITIVE; + + const ::parquet::ColumnDescriptor* descriptor = nullptr; + + // ======== Dremel Levels ======== + + int16_t max_definition_level = 0; + int16_t max_repetition_level = 0; + + int16_t nullable_definition_level = 0; + + int16_t definition_level = 0; + int16_t repetition_level = 0; + + int16_t repeated_ancestor_definition_level = 0; + + int16_t repeated_repetition_level = 0; + + std::vector> children {}; +}; + +Status build_parquet_column_schema(const ::parquet::SchemaDescriptor& schema, + std::vector>* fields); + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/parquet_file_context.cpp b/be/src/format_v2/parquet/parquet_file_context.cpp new file mode 100644 index 00000000000000..dd9bf6aa9545f3 --- /dev/null +++ b/be/src/format_v2/parquet/parquet_file_context.cpp @@ -0,0 +1,442 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/parquet_file_context.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "common/check.h" +#include "common/config.h" +#include "io/file_factory.h" +#include "io/fs/file_reader.h" +#include "storage/cache/page_cache.h" +#include "util/slice.h" + +namespace doris::format::parquet { + +namespace detail { + +std::vector plan_page_cache_range_read( + int64_t position, int64_t nbytes, const std::vector& cached_ranges) { + if (position < 0 || nbytes <= 0) { + return {}; + } + + std::vector ranges; + ranges.reserve(cached_ranges.size()); + const int64_t request_end = position + nbytes; + for (const auto& range : cached_ranges) { + if (range.size > 0 && range.offset < request_end && position < range.end_offset()) { + ranges.push_back(range); + } + } + std::sort(ranges.begin(), ranges.end(), [](const auto& lhs, const auto& rhs) { + if (lhs.offset != rhs.offset) { + return lhs.offset < rhs.offset; + } + return lhs.size > rhs.size; + }); + + std::vector plan; + int64_t cursor = position; + while (cursor < request_end) { + // At each cursor position, choose the cached range that already covers the cursor and + // extends farthest to the right. This handles both adjacent ranges and overlapping + // ranges. If no range covers the current cursor, there is a gap and the request must + // miss as a whole. + auto best = ranges.end(); + int64_t best_end = cursor; + for (auto it = ranges.begin(); it != ranges.end(); ++it) { + const int64_t cached_end = it->end_offset(); + if (it->offset <= cursor && cursor < cached_end && cached_end > best_end) { + best = it; + best_end = cached_end; + } + } + if (best == ranges.end()) { + return {}; + } + const int64_t copy_size = std::min(best_end, request_end) - cursor; + ParquetPageCacheReadPlanEntry entry; + entry.cached_range = *best; + entry.copy_offset_in_cache = cursor - best->offset; + entry.output_offset = cursor - position; + entry.copy_size = copy_size; + plan.push_back(entry); + cursor += copy_size; + } + return plan; +} + +} // namespace detail + +namespace { + +// StoragePageCache only supports exact-key lookup. Keep lightweight range metadata here so later +// Arrow ReadAt requests can reuse cached bytes when their requested ranges are subsets of, or are +// fully covered by, previously cached ranges. Stale metadata is pruned on lookup. +std::mutex cached_page_range_index_mutex; +std::unordered_map> cached_page_range_index; +constexpr size_t MAX_CACHED_PAGE_RANGE_FILES = 4096; +constexpr size_t MAX_CACHED_PAGE_RANGES_PER_FILE = 65536; + +void register_cached_page_range(const std::string& file_key, int64_t position, int64_t nbytes) { + DORIS_CHECK(nbytes > 0); + std::lock_guard lock(cached_page_range_index_mutex); + if (cached_page_range_index.find(file_key) == cached_page_range_index.end() && + cached_page_range_index.size() >= MAX_CACHED_PAGE_RANGE_FILES) { + cached_page_range_index.erase(cached_page_range_index.begin()); + } + auto& ranges = cached_page_range_index[file_key]; + auto it = std::find_if(ranges.begin(), ranges.end(), [&](const ParquetPageCacheRange& range) { + return range.offset == position && range.size == nbytes; + }); + if (it == ranges.end()) { + if (ranges.size() >= MAX_CACHED_PAGE_RANGES_PER_FILE) { + ranges.erase(ranges.begin()); + } + ranges.push_back(ParquetPageCacheRange {position, nbytes}); + } +} + +void unregister_cached_page_range(const std::string& file_key, + const ParquetPageCacheRange& stale_range) { + std::lock_guard lock(cached_page_range_index_mutex); + auto it = cached_page_range_index.find(file_key); + if (it == cached_page_range_index.end()) { + return; + } + auto& ranges = it->second; + ranges.erase(std::remove_if(ranges.begin(), ranges.end(), + [&](const ParquetPageCacheRange& range) { + return range.offset == stale_range.offset && + range.size == stale_range.size; + }), + ranges.end()); + if (ranges.empty()) { + cached_page_range_index.erase(it); + } +} + +std::vector cached_page_ranges_for_file(const std::string& file_key) { + std::lock_guard lock(cached_page_range_index_mutex); + auto it = cached_page_range_index.find(file_key); + if (it == cached_page_range_index.end()) { + return {}; + } + return it->second; +} + +std::string build_page_cache_file_key(const io::FileReader& file_reader, + const io::FileDescription& file_description) { + const int64_t mtime = + file_description.mtime != 0 ? file_description.mtime : file_reader.mtime(); + if (mtime == 0) { + // StoragePageCache is process-global. A key with only path + unknown mtime can outlive a + // rewritten local test file, or any external file whose version was not propagated. Disable + // v2 parquet page cache until the scan descriptor carries a stable object version. + return {}; + } + const int64_t file_size = file_description.file_size >= 0 + ? file_description.file_size + : static_cast(file_reader.size()); + return fmt::format("{}::{}::mtime={}::size={}", file_description.fs_name, + file_reader.path().native(), mtime, file_size); +} + +class DorisRandomAccessFile final : public arrow::io::RandomAccessFile { +public: + DorisRandomAccessFile(io::FileReaderSPtr file_reader, io::IOContext* io_ctx, + bool enable_page_cache, std::string page_cache_file_key) + : _file_reader(std::move(file_reader)), + _io_ctx(io_ctx), + _enable_page_cache(enable_page_cache), + _page_cache_file_key(std::move(page_cache_file_key)) { + DORIS_CHECK(_file_reader != nullptr); + set_mode(arrow::io::FileMode::READ); + } + + arrow::Status Close() override { + _closed = true; + return arrow::Status::OK(); + } + + bool closed() const override { return _closed; } + + arrow::Result Tell() const override { return _pos; } + + arrow::Status Seek(int64_t position) override { + if (position < 0) { + return arrow::Status::Invalid("negative seek position"); + } + _pos = position; + return arrow::Status::OK(); + } + + arrow::Result GetSize() override { + if (!_file_reader) { + return arrow::Status::IOError("Doris file reader is not open"); + } + return static_cast(_file_reader->size()); + } + + arrow::Result Read(int64_t nbytes, void* out) override { + ARROW_ASSIGN_OR_RAISE(auto bytes_read, ReadAt(_pos, nbytes, out)); + _pos += bytes_read; + return bytes_read; + } + + arrow::Result> Read(int64_t nbytes) override { + ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes)); + ARROW_ASSIGN_OR_RAISE(auto bytes_read, Read(nbytes, buffer->mutable_data())); + ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false)); + buffer->ZeroPadding(); + return buffer; + } + + arrow::Result ReadAt(int64_t position, int64_t nbytes, void* out) override { + if (!_file_reader) { + return arrow::Status::IOError("Doris file reader is not open"); + } + if (position < 0 || nbytes < 0) { + return arrow::Status::Invalid("negative read position or length"); + } + if (try_read_from_page_cache(position, nbytes, out)) { + return nbytes; + } + size_t bytes_read = 0; + Status st = _file_reader->read_at( + static_cast(position), + Slice(static_cast(out), static_cast(nbytes)), &bytes_read, + _io_ctx); + if (!st.ok()) { + return arrow::Status::IOError(st.to_string_no_stack()); + } + insert_page_cache(position, nbytes, out, bytes_read); + return static_cast(bytes_read); + } + + arrow::Result> ReadAt(int64_t position, + int64_t nbytes) override { + ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes)); + ARROW_ASSIGN_OR_RAISE(auto bytes_read, ReadAt(position, nbytes, buffer->mutable_data())); + ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false)); + buffer->ZeroPadding(); + return buffer; + } + + void register_page_cache_ranges(std::vector ranges) { + std::lock_guard lock(_page_cache_mutex); + _page_cache_ranges = std::move(ranges); + } + + ParquetPageCacheStats page_cache_stats() const { + std::lock_guard lock(_page_cache_mutex); + return _page_cache_stats; + } + +private: + bool page_cache_enabled() const { + return _enable_page_cache && !config::disable_storage_page_cache && + StoragePageCache::instance() != nullptr && !_page_cache_file_key.empty(); + } + + bool range_in_page_cache_scope(int64_t position, int64_t nbytes) const { + if (nbytes <= 0) { + return false; + } + const int64_t end = position + nbytes; + for (const auto& range : _page_cache_ranges) { + const int64_t range_end = range.offset + range.size; + if (position >= range.offset && end <= range_end) { + return true; + } + } + return false; + } + + StoragePageCache::CacheKey page_cache_key(int64_t position, int64_t nbytes) const { + return StoragePageCache::CacheKey(_page_cache_file_key, + static_cast(position + nbytes), position); + } + + bool copy_cached_range(const ParquetPageCacheRange& cached_range, int64_t copy_position, + int64_t copy_size, void* out, int64_t output_offset) { + PageCacheHandle handle; + if (!StoragePageCache::instance()->lookup( + page_cache_key(cached_range.offset, cached_range.size), &handle, + segment_v2::DATA_PAGE)) { + unregister_cached_page_range(_page_cache_file_key, cached_range); + return false; + } + Slice cached = handle.data(); + const int64_t cache_offset = copy_position - cached_range.offset; + DORIS_CHECK(cache_offset >= 0); + DORIS_CHECK(cached.size >= static_cast(cache_offset + copy_size)); + memcpy(static_cast(out) + output_offset, cached.data + cache_offset, + static_cast(copy_size)); + return true; + } + + bool try_read_from_cached_ranges(int64_t position, int64_t nbytes, void* out) { + auto plan = detail::plan_page_cache_range_read( + position, nbytes, cached_page_ranges_for_file(_page_cache_file_key)); + if (plan.empty()) { + return false; + } + for (const auto& entry : plan) { + if (!copy_cached_range(entry.cached_range, + entry.cached_range.offset + entry.copy_offset_in_cache, + entry.copy_size, out, entry.output_offset)) { + return false; + } + } + return true; + } + + bool try_read_from_page_cache(int64_t position, int64_t nbytes, void* out) { + std::lock_guard lock(_page_cache_mutex); + if (!page_cache_enabled() || !range_in_page_cache_scope(position, nbytes)) { + return false; + } + ++_page_cache_stats.read_count; + // Fast path: Arrow issues the same ReadAt(offset, size) again, so the exact + // StoragePageCache key matches. + // Fallback path: Arrow may read a different but related byte range on another scan. + // Examples: + // - Current request [120, 150) can be served from cached [100, 200) by copying the + // 30-byte subset starting at cached offset 20. + // - Current request [100, 260) can be served by stitching cached [100, 180) and + // [180, 260). If any middle span is missing, it is a miss and the file reader fills + // the whole request from storage. + if (!copy_cached_range(ParquetPageCacheRange {position, nbytes}, position, nbytes, out, + 0) && + !try_read_from_cached_ranges(position, nbytes, out)) { + ++_page_cache_stats.miss_count; + return false; + } + ++_page_cache_stats.hit_count; + ++_page_cache_stats.compressed_hit_count; + return true; + } + + void insert_page_cache(int64_t position, int64_t nbytes, const void* data, size_t bytes_read) { + std::lock_guard lock(_page_cache_mutex); + if (!page_cache_enabled() || !range_in_page_cache_scope(position, nbytes) || + bytes_read != static_cast(nbytes)) { + return; + } + auto* page = new DataPage(bytes_read, true, segment_v2::DATA_PAGE); + memcpy(page->data(), data, bytes_read); + PageCacheHandle handle; + StoragePageCache::instance()->insert(page_cache_key(position, nbytes), page, &handle, + segment_v2::DATA_PAGE); + register_cached_page_range(_page_cache_file_key, position, nbytes); + ++_page_cache_stats.write_count; + ++_page_cache_stats.compressed_write_count; + } + + io::FileReaderSPtr _file_reader; + io::IOContext* _io_ctx = nullptr; + int64_t _pos = 0; + bool _closed = false; + bool _enable_page_cache = false; + std::string _page_cache_file_key; + mutable std::mutex _page_cache_mutex; + std::vector _page_cache_ranges; + ParquetPageCacheStats _page_cache_stats; +}; + +} // namespace + +Status arrow_status_to_doris_status(const arrow::Status& status) { + if (status.ok()) { + return Status::OK(); + } + if (status.IsIOError()) { + return Status::IOError(status.ToString()); + } + if (status.IsInvalid()) { + return Status::InvalidArgument(status.ToString()); + } + return Status::InternalError(status.ToString()); +} + +Status ParquetFileContext::open(io::FileReaderSPtr input_file_reader, io::IOContext* io_ctx, + bool enable_page_cache, + const io::FileDescription& file_description) { + DORIS_CHECK(input_file_reader != nullptr); + auto page_cache_file_key = build_page_cache_file_key(*input_file_reader, file_description); + arrow_file = std::make_shared(std::move(input_file_reader), io_ctx, + enable_page_cache, + std::move(page_cache_file_key)); + try { + // TODO: Cache parquet metadata in file system layer to avoid repeated metadata read for same file. + this->file_reader = ::parquet::ParquetFileReader::Open( + arrow_file, ::parquet::default_reader_properties()); + metadata = this->file_reader->metadata(); + schema = metadata != nullptr ? metadata->schema() : nullptr; + } catch (const ::parquet::ParquetException& e) { + return Status::Corruption("Failed to open parquet file: {}", e.what()); + } catch (const std::exception& e) { + return Status::InternalError("Failed to open parquet file: {}", e.what()); + } + + if (metadata == nullptr || schema == nullptr) { + return Status::Corruption("Failed to read parquet metadata"); + } + return Status::OK(); +} + +void ParquetFileContext::register_page_cache_ranges(std::vector ranges) { + DORIS_CHECK(arrow_file != nullptr); + static_cast(arrow_file.get()) + ->register_page_cache_ranges(std::move(ranges)); +} + +ParquetPageCacheStats ParquetFileContext::page_cache_stats() const { + if (arrow_file == nullptr) { + return {}; + } + return static_cast(arrow_file.get())->page_cache_stats(); +} + +Status ParquetFileContext::close() { + if (file_reader != nullptr) { + try { + file_reader->Close(); + } catch (const std::exception&) { + } + } + if (arrow_file != nullptr) { + static_cast(arrow_status_to_doris_status(arrow_file->Close())); + } + file_reader.reset(); + arrow_file.reset(); + return Status::OK(); +} + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/parquet_file_context.h b/be/src/format_v2/parquet/parquet_file_context.h new file mode 100644 index 00000000000000..8dedf732c8fde0 --- /dev/null +++ b/be/src/format_v2/parquet/parquet_file_context.h @@ -0,0 +1,99 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include +#include +#include + +#include "common/status.h" +#include "io/fs/file_reader.h" + +namespace doris::io { +struct FileDescription; +} // namespace doris::io + +namespace doris::format::parquet { + +struct ParquetPageCacheRange { + int64_t offset = 0; + int64_t size = 0; + + int64_t end_offset() const { return offset + size; } +}; + +struct ParquetPageCacheReadPlanEntry { + // The exact cached StoragePageCache entry. The final cache key is still exact-range based: + // file key + cached_range.end_offset() + cached_range.offset. + ParquetPageCacheRange cached_range; + // Byte offset inside cached_range to start copying from. + int64_t copy_offset_in_cache = 0; + // Byte offset inside the current ReadAt output buffer to start writing to. + int64_t output_offset = 0; + int64_t copy_size = 0; +}; + +struct ParquetPageCacheStats { + int64_t read_count = 0; + int64_t write_count = 0; + int64_t compressed_write_count = 0; + int64_t hit_count = 0; + int64_t miss_count = 0; + int64_t compressed_hit_count = 0; +}; + +namespace detail { + +// Build the copy plan for a ReadAt(position, nbytes) request from the range metadata of +// previously cached entries. +// StoragePageCache cannot do range lookup by itself; it can only lookup an exact key. The +// caller therefore keeps lightweight cached range metadata and uses this function to decide +// which exact cache entries to fetch and which byte spans to copy. +// Examples: +// 1. Subset hit: +// request [120, 150), cached [100, 200) -> copy 30 bytes from cached offset 20. +// 2. Superset hit covered by multiple cached entries: +// request [100, 260), cached [100, 180) and [180, 260) +// -> two copies: [100, 180) to output offset 0, [180, 260) to output offset 80. +// 3. Partial overlap is a miss: +// request [100, 260), cached [100, 180) only -> empty plan, caller reads from file. +std::vector plan_page_cache_range_read( + int64_t position, int64_t nbytes, const std::vector& cached_ranges); + +} // namespace detail + +struct ParquetFileContext { + std::shared_ptr arrow_file; // Arrow wrapper for Doris FileReader + std::unique_ptr<::parquet::ParquetFileReader> file_reader; // Arrow Parquet file parser + std::shared_ptr<::parquet::FileMetaData> metadata; // footer metadata (RowGroup information) + const ::parquet::SchemaDescriptor* schema = nullptr; // physical leaf column schema + + Status open(io::FileReaderSPtr input_file_reader, io::IOContext* io_ctx, bool enable_page_cache, + const io::FileDescription& file_description); + // Register file ranges that belong to selected Parquet column chunks. Arrow still owns page + // decoding, so v2 caches the serialized bytes read inside these ranges and excludes + // footer/metadata reads that happen before registration. + void register_page_cache_ranges(std::vector ranges); + ParquetPageCacheStats page_cache_stats() const; + Status close(); +}; + +Status arrow_status_to_doris_status(const arrow::Status& status); + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/parquet_profile.cpp b/be/src/format_v2/parquet/parquet_profile.cpp new file mode 100644 index 00000000000000..79f979ea0cf1b8 --- /dev/null +++ b/be/src/format_v2/parquet/parquet_profile.cpp @@ -0,0 +1,191 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/parquet_profile.h" + +#include "format_v2/parquet/parquet_statistics.h" + +namespace doris::format::parquet { + +void ParquetProfile::init(RuntimeProfile* profile) { + if (profile == nullptr) { + return; + } + + static const char* parquet_profile = "ParquetReader"; + ADD_TIMER_WITH_LEVEL(profile, parquet_profile, 1); + + filtered_row_groups = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowGroupsFiltered", TUnit::UNIT, + parquet_profile, 1); + filtered_row_groups_by_min_max = ADD_CHILD_COUNTER_WITH_LEVEL( + profile, "RowGroupsFilteredByMinMax", TUnit::UNIT, parquet_profile, 1); + filtered_row_groups_by_dictionary = ADD_CHILD_COUNTER_WITH_LEVEL( + profile, "RowGroupsFilteredByDictionary", TUnit::UNIT, parquet_profile, 1); + filtered_row_groups_by_bloom_filter = ADD_CHILD_COUNTER_WITH_LEVEL( + profile, "RowGroupsFilteredByBloomFilter", TUnit::UNIT, parquet_profile, 1); + to_read_row_groups = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowGroupsReadNum", TUnit::UNIT, + parquet_profile, 1); + total_row_groups = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowGroupsTotalNum", TUnit::UNIT, + parquet_profile, 1); + selected_row_ranges = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "SelectedRowRanges", TUnit::UNIT, + parquet_profile, 1); + filtered_group_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredRowsByGroup", TUnit::UNIT, + parquet_profile, 1); + filtered_page_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredRowsByPage", TUnit::UNIT, + parquet_profile, 1); + pages_skipped_by_data_page_filter = ADD_CHILD_COUNTER_WITH_LEVEL( + profile, "PagesSkippedByDataPageFilter", TUnit::UNIT, parquet_profile, 1); + data_page_filter_skip_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "DataPageFilterSkipBytes", + TUnit::BYTES, parquet_profile, 1); + selected_rows = + ADD_CHILD_COUNTER_WITH_LEVEL(profile, "SelectedRows", TUnit::UNIT, parquet_profile, 1); + rows_filtered_by_conjunct = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowsFilteredByConjunct", + TUnit::UNIT, parquet_profile, 1); + total_batches = + ADD_CHILD_COUNTER_WITH_LEVEL(profile, "TotalBatches", TUnit::UNIT, parquet_profile, 1); + empty_selection_batches = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "EmptySelectionBatches", + TUnit::UNIT, parquet_profile, 1); + range_gap_skipped_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RangeGapSkippedRows", + TUnit::UNIT, parquet_profile, 1); + reader_read_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ReaderReadRows", TUnit::UNIT, + parquet_profile, 1); + reader_skip_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ReaderSkipRows", TUnit::UNIT, + parquet_profile, 1); + reader_select_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ReaderSelectRows", TUnit::UNIT, + parquet_profile, 1); + arrow_read_records_time = + ADD_CHILD_TIMER_WITH_LEVEL(profile, "ArrowReadRecordsTime", parquet_profile, 1); + materialization_time = + ADD_CHILD_TIMER_WITH_LEVEL(profile, "MaterializationTime", parquet_profile, 1); + lazy_read_filtered_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredRowsByLazyRead", + TUnit::UNIT, parquet_profile, 1); + filtered_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredBytes", TUnit::BYTES, + parquet_profile, 1); + raw_rows_read = + ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RawRowsRead", TUnit::UNIT, parquet_profile, 1); + column_read_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ColumnReadTime", parquet_profile, 1); + parse_meta_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ParseMetaTime", parquet_profile, 1); + parse_footer_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ParseFooterTime", parquet_profile, 1); + file_reader_create_time = + ADD_CHILD_TIMER_WITH_LEVEL(profile, "FileReaderCreateTime", parquet_profile, 1); + open_file_num = + ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FileNum", TUnit::UNIT, parquet_profile, 1); + page_index_read_calls = ADD_COUNTER_WITH_LEVEL(profile, "PageIndexReadCalls", TUnit::UNIT, 1); + page_index_filter_time = + ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageIndexFilterTime", parquet_profile, 1); + read_page_index_time = + ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageIndexReadTime", parquet_profile, 1); + parse_page_index_time = + ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageIndexParseTime", parquet_profile, 1); + row_group_filter_time = + ADD_CHILD_TIMER_WITH_LEVEL(profile, "RowGroupFilterTime", parquet_profile, 1); + file_footer_read_calls = ADD_COUNTER_WITH_LEVEL(profile, "FileFooterReadCalls", TUnit::UNIT, 1); + file_footer_hit_cache = ADD_COUNTER_WITH_LEVEL(profile, "FileFooterHitCache", TUnit::UNIT, 1); + decompress_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecompressTime", parquet_profile, 1); + decompress_cnt = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "DecompressCount", TUnit::UNIT, + parquet_profile, 1); + page_read_counter = + ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageReadCount", TUnit::UNIT, parquet_profile, 1); + page_cache_write_counter = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageCacheWriteCount", + TUnit::UNIT, parquet_profile, 1); + page_cache_compressed_write_counter = ADD_CHILD_COUNTER_WITH_LEVEL( + profile, "PageCacheCompressedWriteCount", TUnit::UNIT, parquet_profile, 1); + page_cache_decompressed_write_counter = ADD_CHILD_COUNTER_WITH_LEVEL( + profile, "PageCacheDecompressedWriteCount", TUnit::UNIT, parquet_profile, 1); + page_cache_hit_counter = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageCacheHitCount", TUnit::UNIT, + parquet_profile, 1); + page_cache_missing_counter = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageCacheMissingCount", + TUnit::UNIT, parquet_profile, 1); + page_cache_compressed_hit_counter = ADD_CHILD_COUNTER_WITH_LEVEL( + profile, "PageCacheCompressedHitCount", TUnit::UNIT, parquet_profile, 1); + page_cache_decompressed_hit_counter = ADD_CHILD_COUNTER_WITH_LEVEL( + profile, "PageCacheDecompressedHitCount", TUnit::UNIT, parquet_profile, 1); + decode_header_time = + ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageHeaderDecodeTime", parquet_profile, 1); + read_page_header_time = + ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageHeaderReadTime", parquet_profile, 1); + decode_value_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeValueTime", parquet_profile, 1); + decode_dict_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeDictTime", parquet_profile, 1); + decode_level_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeLevelTime", parquet_profile, 1); + decode_null_map_time = + ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeNullMapTime", parquet_profile, 1); + skip_page_header_num = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "SkipPageHeaderNum", TUnit::UNIT, + parquet_profile, 1); + parse_page_header_num = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ParsePageHeaderNum", TUnit::UNIT, + parquet_profile, 1); + predicate_filter_time = + ADD_CHILD_TIMER_WITH_LEVEL(profile, "PredicateFilterTime", parquet_profile, 1); + dict_filter_rewrite_time = + ADD_CHILD_TIMER_WITH_LEVEL(profile, "DictFilterRewriteTime", parquet_profile, 1); + convert_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ConvertTime", parquet_profile, 1); + bloom_filter_read_time = + ADD_CHILD_TIMER_WITH_LEVEL(profile, "BloomFilterReadTime", parquet_profile, 1); +} + +void ParquetProfile::update_pruning_stats(const ParquetPruningStats& pruning_stats) const { + COUNTER_UPDATE(filtered_row_groups, + pruning_stats.total_row_groups - pruning_stats.selected_row_groups); + COUNTER_UPDATE(filtered_row_groups_by_min_max, pruning_stats.filtered_row_groups_by_statistics); + COUNTER_UPDATE(filtered_row_groups_by_dictionary, + pruning_stats.filtered_row_groups_by_dictionary); + COUNTER_UPDATE(filtered_row_groups_by_bloom_filter, + pruning_stats.filtered_row_groups_by_bloom_filter); + COUNTER_UPDATE(to_read_row_groups, pruning_stats.selected_row_groups); + COUNTER_UPDATE(total_row_groups, pruning_stats.total_row_groups); + COUNTER_UPDATE(selected_row_ranges, pruning_stats.selected_row_ranges); + COUNTER_UPDATE(filtered_group_rows, pruning_stats.filtered_group_rows); + COUNTER_UPDATE(filtered_page_rows, pruning_stats.filtered_page_rows); + COUNTER_UPDATE(page_index_read_calls, pruning_stats.page_index_read_calls); + COUNTER_UPDATE(bloom_filter_read_time, pruning_stats.bloom_filter_read_time); + COUNTER_UPDATE(row_group_filter_time, pruning_stats.row_group_filter_time); + COUNTER_UPDATE(page_index_filter_time, pruning_stats.page_index_filter_time); + COUNTER_UPDATE(read_page_index_time, pruning_stats.read_page_index_time); +} + +ParquetPageSkipProfile ParquetProfile::page_skip_profile() const { + return { + .skipped_pages = pages_skipped_by_data_page_filter, + .skipped_bytes = data_page_filter_skip_bytes, + }; +} + +ParquetColumnReaderProfile ParquetProfile::column_reader_profile() const { + return { + .reader_read_rows = reader_read_rows, + .reader_skip_rows = reader_skip_rows, + .reader_select_rows = reader_select_rows, + .arrow_read_records_time = arrow_read_records_time, + .materialization_time = materialization_time, + }; +} + +ParquetScanProfile ParquetProfile::scan_profile() const { + return { + .raw_rows_read = raw_rows_read, + .selected_rows = selected_rows, + .rows_filtered_by_conjunct = rows_filtered_by_conjunct, + .lazy_read_filtered_rows = lazy_read_filtered_rows, + .total_batches = total_batches, + .empty_selection_batches = empty_selection_batches, + .range_gap_skipped_rows = range_gap_skipped_rows, + .column_read_time = column_read_time, + .predicate_filter_time = predicate_filter_time, + .column_reader_profile = column_reader_profile(), + }; +} + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/parquet_profile.h b/be/src/format_v2/parquet/parquet_profile.h new file mode 100644 index 00000000000000..8f7623527ca707 --- /dev/null +++ b/be/src/format_v2/parquet/parquet_profile.h @@ -0,0 +1,140 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "runtime/runtime_profile.h" + +namespace doris::format::parquet { + +struct ParquetPruningStats; + +// ============================================================================ +// ============================================================================ +struct ParquetPageSkipProfile { + RuntimeProfile::Counter* skipped_pages = nullptr; // number of data pages skipped by page index + RuntimeProfile::Counter* skipped_bytes = nullptr; // compressed bytes skipped +}; + +// ============================================================================ +// ============================================================================ +struct ParquetColumnReaderProfile { + RuntimeProfile::Counter* reader_read_rows = nullptr; // rows read by read() + RuntimeProfile::Counter* reader_skip_rows = nullptr; // rows skipped by skip() + RuntimeProfile::Counter* reader_select_rows = nullptr; // rows selected by select() + RuntimeProfile::Counter* arrow_read_records_time = nullptr; // Arrow RecordReader time (ns) + RuntimeProfile::Counter* materialization_time = nullptr; // value materialization time (ns) +}; + +// ============================================================================ +// ============================================================================ +struct ParquetScanProfile { + RuntimeProfile::Counter* raw_rows_read = nullptr; // raw rows read from RecordReader + RuntimeProfile::Counter* selected_rows = nullptr; // rows selected after conjunct filtering + RuntimeProfile::Counter* rows_filtered_by_conjunct = nullptr; // rows filtered by conjuncts + RuntimeProfile::Counter* lazy_read_filtered_rows = + nullptr; // rows avoided by late materialization + RuntimeProfile::Counter* total_batches = nullptr; // total batch count + RuntimeProfile::Counter* empty_selection_batches = + nullptr; // empty batches after full filtering + RuntimeProfile::Counter* range_gap_skipped_rows = nullptr; // rows skipped by range gaps + RuntimeProfile::Counter* column_read_time = nullptr; // column read time (ns) + RuntimeProfile::Counter* predicate_filter_time = nullptr; // predicate filter time (ns) + ParquetColumnReaderProfile column_reader_profile; // nested column read statistics +}; + +// ============================================================================ +// ============================================================================ +// ============================================================================ +struct ParquetProfile { + void init(RuntimeProfile* profile); + void update_pruning_stats(const ParquetPruningStats& pruning_stats) const; + + ParquetPageSkipProfile page_skip_profile() const; + ParquetColumnReaderProfile column_reader_profile() const; + ParquetScanProfile scan_profile() const; + + RuntimeProfile::Counter* filtered_row_groups = nullptr; + RuntimeProfile::Counter* filtered_row_groups_by_min_max = nullptr; + RuntimeProfile::Counter* filtered_row_groups_by_dictionary = nullptr; + RuntimeProfile::Counter* filtered_row_groups_by_bloom_filter = nullptr; + RuntimeProfile::Counter* to_read_row_groups = nullptr; + RuntimeProfile::Counter* total_row_groups = nullptr; + RuntimeProfile::Counter* selected_row_ranges = nullptr; + RuntimeProfile::Counter* filtered_group_rows = nullptr; + RuntimeProfile::Counter* filtered_page_rows = nullptr; + + // ======== Page Skip ======== + RuntimeProfile::Counter* pages_skipped_by_data_page_filter = nullptr; + RuntimeProfile::Counter* data_page_filter_skip_bytes = nullptr; + + RuntimeProfile::Counter* selected_rows = nullptr; + RuntimeProfile::Counter* rows_filtered_by_conjunct = nullptr; + RuntimeProfile::Counter* total_batches = nullptr; + RuntimeProfile::Counter* empty_selection_batches = nullptr; + RuntimeProfile::Counter* range_gap_skipped_rows = nullptr; + + // ======== Column Reader ======== + RuntimeProfile::Counter* reader_read_rows = nullptr; + RuntimeProfile::Counter* reader_skip_rows = nullptr; + RuntimeProfile::Counter* reader_select_rows = nullptr; + RuntimeProfile::Counter* arrow_read_records_time = nullptr; + RuntimeProfile::Counter* materialization_time = nullptr; + + RuntimeProfile::Counter* lazy_read_filtered_rows = nullptr; + RuntimeProfile::Counter* filtered_bytes = nullptr; + RuntimeProfile::Counter* raw_rows_read = nullptr; + RuntimeProfile::Counter* column_read_time = nullptr; + + RuntimeProfile::Counter* parse_meta_time = nullptr; + RuntimeProfile::Counter* parse_footer_time = nullptr; + RuntimeProfile::Counter* file_reader_create_time = nullptr; + RuntimeProfile::Counter* open_file_num = nullptr; + RuntimeProfile::Counter* file_footer_read_calls = nullptr; + RuntimeProfile::Counter* file_footer_hit_cache = nullptr; + + RuntimeProfile::Counter* row_group_filter_time = nullptr; + RuntimeProfile::Counter* page_index_read_calls = nullptr; + RuntimeProfile::Counter* page_index_filter_time = nullptr; + RuntimeProfile::Counter* read_page_index_time = nullptr; + RuntimeProfile::Counter* parse_page_index_time = nullptr; + + RuntimeProfile::Counter* decompress_time = nullptr; + RuntimeProfile::Counter* decompress_cnt = nullptr; + RuntimeProfile::Counter* page_read_counter = nullptr; + RuntimeProfile::Counter* page_cache_write_counter = nullptr; + RuntimeProfile::Counter* page_cache_compressed_write_counter = nullptr; + RuntimeProfile::Counter* page_cache_decompressed_write_counter = nullptr; + RuntimeProfile::Counter* page_cache_hit_counter = nullptr; + RuntimeProfile::Counter* page_cache_missing_counter = nullptr; + RuntimeProfile::Counter* page_cache_compressed_hit_counter = nullptr; + RuntimeProfile::Counter* page_cache_decompressed_hit_counter = nullptr; + + RuntimeProfile::Counter* decode_header_time = nullptr; + RuntimeProfile::Counter* read_page_header_time = nullptr; + RuntimeProfile::Counter* decode_value_time = nullptr; + RuntimeProfile::Counter* decode_dict_time = nullptr; + RuntimeProfile::Counter* decode_level_time = nullptr; + RuntimeProfile::Counter* decode_null_map_time = nullptr; + RuntimeProfile::Counter* skip_page_header_num = nullptr; + RuntimeProfile::Counter* parse_page_header_num = nullptr; + + RuntimeProfile::Counter* predicate_filter_time = nullptr; + RuntimeProfile::Counter* dict_filter_rewrite_time = nullptr; + RuntimeProfile::Counter* convert_time = nullptr; + RuntimeProfile::Counter* bloom_filter_read_time = nullptr; +}; + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/parquet_reader.cpp b/be/src/format_v2/parquet/parquet_reader.cpp new file mode 100644 index 00000000000000..24797200693020 --- /dev/null +++ b/be/src/format_v2/parquet/parquet_reader.cpp @@ -0,0 +1,674 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/parquet_reader.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_factory.hpp" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_struct.h" +#include "format_v2/column_mapper.h" +#include "format_v2/parquet/parquet_column_schema.h" +#include "format_v2/parquet/parquet_file_context.h" +#include "format_v2/parquet/parquet_scan.h" +#include "format_v2/parquet/parquet_statistics.h" +#include "format_v2/parquet/reader/column_reader.h" +#include "runtime/runtime_state.h" + +namespace doris::format::parquet { + +struct ParquetReaderScanState { + ParquetFileContext file_context; + std::vector> file_schema; + RowGroupScanPlan scan_plan; + ParquetScanScheduler scheduler; + const cctz::time_zone* timezone = nullptr; + bool enable_bloom_filter = false; + bool enable_page_cache = false; + bool enable_strict_mode = false; +}; + +int64_t column_chunk_start_offset(const ::parquet::ColumnChunkMetaData& column_metadata) { + return column_metadata.has_dictionary_page() + ? cast_set(column_metadata.dictionary_page_offset()) + : cast_set(column_metadata.data_page_offset()); +} + +void collect_all_leaf_column_ids(const ParquetColumnSchema& column_schema, + std::unordered_set* leaf_column_ids) { + DORIS_CHECK(leaf_column_ids != nullptr); + if (column_schema.kind == ParquetColumnSchemaKind::PRIMITIVE) { + if (column_schema.leaf_column_id >= 0) { + leaf_column_ids->insert(column_schema.leaf_column_id); + } + return; + } + for (const auto& child : column_schema.children) { + DORIS_CHECK(child != nullptr); + collect_all_leaf_column_ids(*child, leaf_column_ids); + } +} + +void collect_projected_leaf_column_ids(const ParquetColumnSchema& column_schema, + const format::LocalColumnIndex& projection, + std::unordered_set* leaf_column_ids) { + DORIS_CHECK(leaf_column_ids != nullptr); + if (projection.project_all_children || projection.children.empty()) { + collect_all_leaf_column_ids(column_schema, leaf_column_ids); + return; + } + for (const auto& child_projection : projection.children) { + const auto child_it = + std::ranges::find_if(column_schema.children, [&](const auto& child_schema) { + return child_schema->local_id == child_projection.local_id(); + }); + DORIS_CHECK(child_it != column_schema.children.end()); + collect_projected_leaf_column_ids(**child_it, child_projection, leaf_column_ids); + } +} + +void collect_request_leaf_column_ids( + const std::vector>& file_schema, + const format::FileScanRequest& request, std::unordered_set* leaf_column_ids) { + DORIS_CHECK(leaf_column_ids != nullptr); + auto collect_scan_column = [&](const format::LocalColumnIndex& projection) { + const auto local_id = projection.local_id(); + if (local_id == format::ROW_POSITION_COLUMN_ID || + local_id == format::GLOBAL_ROWID_COLUMN_ID) { + return; + } + DORIS_CHECK(local_id >= 0 && local_id < static_cast(file_schema.size())); + DORIS_CHECK(file_schema[local_id] != nullptr); + collect_projected_leaf_column_ids(*file_schema[local_id], projection, leaf_column_ids); + }; + for (const auto& column : request.predicate_columns) { + collect_scan_column(column); + } + for (const auto& column : request.non_predicate_columns) { + collect_scan_column(column); + } +} + +std::vector build_page_cache_ranges( + const ::parquet::FileMetaData& metadata, + const std::vector>& file_schema, + const format::FileScanRequest& request, const RowGroupScanPlan& row_group_plan) { + std::unordered_set leaf_column_ids; + collect_request_leaf_column_ids(file_schema, request, &leaf_column_ids); + std::vector ranges; + ranges.reserve(row_group_plan.row_groups.size() * leaf_column_ids.size()); + for (const auto& row_group_plan_item : row_group_plan.row_groups) { + auto row_group_metadata = metadata.RowGroup(row_group_plan_item.row_group_id); + DORIS_CHECK(row_group_metadata != nullptr); + for (const auto leaf_column_id : leaf_column_ids) { + DORIS_CHECK(leaf_column_id >= 0 && leaf_column_id < row_group_metadata->num_columns()); + auto column_metadata = row_group_metadata->ColumnChunk(leaf_column_id); + DORIS_CHECK(column_metadata != nullptr); + const int64_t offset = column_chunk_start_offset(*column_metadata); + const int64_t size = column_metadata->total_compressed_size(); + DORIS_CHECK(offset >= 0); + DORIS_CHECK(size >= 0); + if (size > 0) { + ranges.push_back(ParquetPageCacheRange {.offset = offset, .size = size}); + } + } + } + return ranges; +} + +const ParquetColumnSchema& projected_root_schema( + const std::vector>& file_schema, + const format::LocalColumnIndex& projection) { + const auto local_id = projection.local_id(); + DORIS_CHECK(local_id >= 0 && local_id < static_cast(file_schema.size())); + DORIS_CHECK(file_schema[local_id] != nullptr); + return *file_schema[local_id]; +} + +int64_t count_loaded_non_null_values(const ParquetColumnSchema& root_schema, + const ParquetColumnReader& shape_reader, + int64_t expected_rows) { + const auto& def_levels = shape_reader.nested_definition_levels(); + const auto& rep_levels = shape_reader.nested_repetition_levels(); + const int64_t levels_written = shape_reader.nested_levels_written(); + DORIS_CHECK(levels_written >= expected_rows); + if (root_schema.max_repetition_level == 0) { + DORIS_CHECK(levels_written == expected_rows); + const int16_t non_null_definition_level = root_schema.nullable_definition_level; + int64_t count = 0; + for (int64_t level_idx = 0; level_idx < levels_written; ++level_idx) { + count += def_levels[level_idx] >= non_null_definition_level ? 1 : 0; + } + return count; + } + + // For repeated encodings, one top-level row starts when the leaf repetition level moves above + // no higher than the top-level container's repeated boundary. Empty MAP/LIST rows have no + // entries but still carry a level slot; they are non-NULL and must be counted by count(col). + const int16_t non_null_definition_level = + static_cast(root_schema.definition_level - 1); + int64_t counted_rows = 0; + int64_t non_null_rows = 0; + for (int64_t level_idx = 0; level_idx < levels_written && counted_rows < expected_rows; + ++level_idx) { + if (rep_levels[level_idx] >= root_schema.repetition_level) { + continue; + } + ++counted_rows; + non_null_rows += def_levels[level_idx] >= non_null_definition_level ? 1 : 0; + } + DORIS_CHECK(counted_rows == expected_rows); + return non_null_rows; +} + +DataTypePtr nullable_like_original(const DataTypePtr& type, DataTypePtr nested_type) { + return type != nullptr && type->is_nullable() ? make_nullable(nested_type) : nested_type; +} + +int timestamp_tz_scale(const ParquetTypeDescriptor& type_descriptor) { + switch (type_descriptor.time_unit) { + case ParquetTimeUnit::MILLIS: + return 3; + case ParquetTimeUnit::MICROS: + case ParquetTimeUnit::UNKNOWN: + default: + return 6; + } +} + +bool should_map_to_timestamp_tz(const ParquetColumnSchema& column_schema) { + const auto& type_descriptor = column_schema.type_descriptor; + return type_descriptor.physical_type == ::parquet::Type::INT96 || + (type_descriptor.is_timestamp && type_descriptor.timestamp_is_adjusted_to_utc); +} + +DataTypePtr apply_timestamp_tz_mapping(ParquetColumnSchema* column_schema) { + DORIS_CHECK(column_schema != nullptr); + if (column_schema->kind == ParquetColumnSchemaKind::PRIMITIVE) { + if (should_map_to_timestamp_tz(*column_schema)) { + const bool nullable = + column_schema->type != nullptr && column_schema->type->is_nullable(); + const auto scale = timestamp_tz_scale(column_schema->type_descriptor); + column_schema->type = DataTypeFactory::instance().create_data_type(TYPE_TIMESTAMPTZ, + nullable, 0, scale); + column_schema->type_descriptor.doris_type = column_schema->type; + } + return column_schema->type; + } + + std::vector child_types; + child_types.reserve(column_schema->children.size()); + for (auto& child : column_schema->children) { + child_types.push_back(apply_timestamp_tz_mapping(child.get())); + } + + if (column_schema->kind == ParquetColumnSchemaKind::LIST) { + DORIS_CHECK(child_types.size() == 1); + column_schema->type = nullable_like_original( + column_schema->type, std::make_shared(child_types[0])); + } else if (column_schema->kind == ParquetColumnSchemaKind::MAP) { + DORIS_CHECK(child_types.size() == 2); + column_schema->type = nullable_like_original( + column_schema->type, std::make_shared(make_nullable(child_types[0]), + make_nullable(child_types[1]))); + } else if (column_schema->kind == ParquetColumnSchemaKind::STRUCT) { + Strings child_names; + child_names.reserve(column_schema->children.size()); + for (const auto& child : column_schema->children) { + child_names.push_back(child->name); + } + column_schema->type = nullable_like_original( + column_schema->type, std::make_shared(child_types, child_names)); + } + return column_schema->type; +} + +static Status find_projected_minmax_leaf(const ParquetColumnSchema& column_schema, + const format::LocalColumnIndex& projection, + const ParquetColumnSchema** leaf_schema) { + DORIS_CHECK(leaf_schema != nullptr); + if (projection.project_all_children || projection.children.empty()) { + if (column_schema.leaf_column_id < 0) { + return Status::NotSupported( + "Parquet aggregate pushdown only supports primitive column {}", + column_schema.name); + } + if (column_schema.max_repetition_level > 0) { + return Status::NotSupported( + "Parquet aggregate pushdown does not support repeated column {}", + column_schema.name); + } + *leaf_schema = &column_schema; + return Status::OK(); + } + if (projection.children.size() != 1) { + return Status::NotSupported( + "Parquet aggregate pushdown only supports a single nested leaf under column {}", + column_schema.name); + } + const auto& child_projection = projection.children[0]; + const auto child_schema_it = + std::ranges::find_if(column_schema.children, [&](const auto& child_schema) { + return child_schema->local_id == child_projection.local_id(); + }); + if (child_schema_it != column_schema.children.end()) { + return find_projected_minmax_leaf(**child_schema_it, child_projection, leaf_schema); + } + return Status::InvalidArgument("Invalid parquet aggregate projection local id {} for column {}", + child_projection.local_id(), column_schema.name); +} + +void ParquetReader::_fill_column_definition(const ParquetColumnSchema& column_schema, + format::ColumnDefinition* field) const { + if (column_schema.parquet_field_id >= 0) { + field->identifier = Field::create_field(column_schema.parquet_field_id); + } else { + field->identifier = Field::create_field(column_schema.name); + } + field->local_id = column_schema.local_id; + field->name = column_schema.name; + field->type = column_schema.type != nullptr && !column_schema.type->is_nullable() + ? make_nullable(column_schema.type) + : column_schema.type; + field->children.clear(); + field->children.reserve(column_schema.children.size()); + for (const auto& child : column_schema.children) { + format::ColumnDefinition child_field; + _fill_column_definition(*child, &child_field); + field->children.push_back(std::move(child_field)); + } +} + +ParquetReader::ParquetReader(std::shared_ptr& system_properties, + std::unique_ptr& file_description, + std::shared_ptr io_ctx, RuntimeProfile* profile, + std::optional global_rowid_context, + bool enable_mapping_timestamp_tz) + : FileReader(system_properties, file_description, io_ctx, profile), + _global_rowid_context(global_rowid_context), + _enable_mapping_timestamp_tz(enable_mapping_timestamp_tz) {} + +ParquetReader::~ParquetReader() = default; + +Status ParquetReader::init(RuntimeState* state) { + RETURN_IF_ERROR(format::FileReader::init(state)); + if (_profile != nullptr) { + COUNTER_UPDATE(_parquet_profile.file_reader_create_time, + _reader_statistics.file_reader_create_time); + COUNTER_UPDATE(_parquet_profile.open_file_num, _reader_statistics.open_file_num); + } + _state = std::make_unique(); + _state->enable_bloom_filter = + state != nullptr && state->query_options().enable_parquet_filter_by_bloom_filter; + _state->enable_page_cache = + state != nullptr && state->query_options().enable_parquet_file_page_cache; + if (state != nullptr) { + _state->timezone = &state->timezone_obj(); + _state->enable_strict_mode = state->enable_strict_mode(); + _state->scheduler.set_timezone(&state->timezone_obj()); + _state->scheduler.set_enable_strict_mode(_state->enable_strict_mode); + } + _state->scheduler.set_batch_size(_batch_size); + // Open parquet file and parse metadata to get file schema. + RETURN_IF_ERROR(_state->file_context.open(_tracing_file_reader, _io_ctx.get(), + _state->enable_page_cache, *_file_description)); + // Build file schema from parquet metadata. + // A file reader may expose raw file identifiers, such as Parquet field_id, through ColumnDefinition::identifier + RETURN_IF_ERROR( + build_parquet_column_schema(*_state->file_context.schema, &_state->file_schema)); + if (_enable_mapping_timestamp_tz) { + for (auto& column_schema : _state->file_schema) { + apply_timestamp_tz_mapping(column_schema.get()); + } + } + return Status::OK(); +} + +void ParquetReader::set_batch_size(size_t batch_size) { + _batch_size = std::max(1, batch_size); + if (_state != nullptr) { + _state->scheduler.set_batch_size(_batch_size); + } +} + +Status ParquetReader::get_schema(std::vector* file_schema) const { + if (file_schema == nullptr) { + return Status::InvalidArgument("file_schema is null"); + } + file_schema->clear(); + if (_state == nullptr || _state->file_context.schema == nullptr) { + return Status::Uninitialized("ParquetReader is not open"); + } + + file_schema->reserve(_state->file_schema.size()); + for (size_t column_idx = 0; column_idx < _state->file_schema.size(); ++column_idx) { + format::ColumnDefinition field; + _fill_column_definition(*_state->file_schema[column_idx], &field); + DORIS_CHECK(field.local_id == static_cast(column_idx)); + file_schema->push_back(std::move(field)); + } + if (_global_rowid_context.has_value()) { + file_schema->push_back(format::global_rowid_column_definition()); + } + return Status::OK(); +} + +std::unique_ptr ParquetReader::create_column_mapper( + format::TableColumnMapperOptions options) const { + return std::make_unique(std::move(options)); +} + +Status ParquetReader::open(std::shared_ptr request) { + if (_state == nullptr || _state->file_context.metadata == nullptr || + _state->file_context.schema == nullptr) { + return Status::Uninitialized("ParquetReader is not open"); + } + auto request_snapshot = request; + DORIS_CHECK(request_snapshot != nullptr); + RETURN_IF_ERROR(format::FileReader::open(std::move(request))); + + const int num_fields = static_cast(_state->file_schema.size()); + for (const auto& column_filter : request_snapshot->column_predicate_filters) { + const auto file_column_id = column_filter.effective_file_column_id(); + if (!file_column_id.is_valid() || file_column_id.value() >= num_fields) { + return Status::InvalidArgument("Invalid parquet filter top-level local id {}", + file_column_id.value()); + } + } + + // `local_positions.empty()` means all columns are needed by table reader + // TODO(gabriel): It will happen only for TVF `select *` query. + if (request_snapshot->local_positions.empty()) { + for (const auto& col : request_snapshot->predicate_columns) { + request_snapshot->local_positions.emplace(col.column_id(), + format::LocalIndex(col.column_id().value())); + } + for (const auto& col : request_snapshot->non_predicate_columns) { + request_snapshot->local_positions.emplace(col.column_id(), + format::LocalIndex(col.column_id().value())); + } + } + + for (const auto& col : request_snapshot->predicate_columns) { + DORIS_CHECK(request_snapshot->local_positions.count(col.column_id()) > 0); + const auto local_id = col.local_id(); + if (local_id == format::ROW_POSITION_COLUMN_ID || + local_id == format::GLOBAL_ROWID_COLUMN_ID) { + continue; + } + DORIS_CHECK(local_id >= 0 && local_id < num_fields); + } + for (const auto& col : request_snapshot->non_predicate_columns) { + DORIS_CHECK(request_snapshot->local_positions.count(col.column_id()) > 0); + const auto local_id = col.local_id(); + if (local_id == format::ROW_POSITION_COLUMN_ID || + local_id == format::GLOBAL_ROWID_COLUMN_ID) { + continue; + } + DORIS_CHECK(local_id >= 0 && local_id < num_fields); + } + + RowGroupScanPlan row_group_plan; + ParquetScanRange scan_range; + scan_range.start_offset = _file_description->range_start_offset; + scan_range.size = _file_description->range_size; + scan_range.file_size = _file_description->file_size; + // Get selected ranges in row groups according to metadata (Row-Group level index and Page Index including Zonemap, Dictionary, Bloom Filter). + RETURN_IF_ERROR(plan_parquet_row_groups( + *_state->file_context.metadata, _state->file_context.file_reader.get(), + _state->file_schema, *request_snapshot, scan_range, _state->enable_bloom_filter, + &row_group_plan, _state->timezone)); + if (_profile != nullptr) { + _parquet_profile.update_pruning_stats(row_group_plan.pruning_stats); + } + if (_state->enable_page_cache) { + _state->file_context.register_page_cache_ranges( + build_page_cache_ranges(*_state->file_context.metadata, _state->file_schema, + *request_snapshot, row_group_plan)); + } + _state->scan_plan = row_group_plan; + _state->scheduler.set_page_skip_profile(_parquet_profile.page_skip_profile()); + _state->scheduler.set_global_rowid_context(_global_rowid_context); + _state->scheduler.set_scan_profile(_parquet_profile.scan_profile()); + _state->scheduler.set_plan(std::move(row_group_plan)); + _eof = _state->scheduler.empty(); + return Status::OK(); +} + +Status ParquetReader::get_block(Block* file_block, size_t* rows, bool* eof) { + if (_state == nullptr || _state->file_context.file_reader == nullptr || + _state->file_context.schema == nullptr) { + return Status::Uninitialized("ParquetReader is not open"); + } + *rows = 0; + if (_eof) { + *eof = true; + return Status::OK(); + } + auto request_snapshot = _request; + if (request_snapshot == nullptr) { + return Status::Cancelled("ParquetReader is closed"); + } + + const auto predicate_filtered_rows_before = _state->scheduler.predicate_filtered_rows(); + RETURN_IF_ERROR(_state->scheduler.read_next_batch(_state->file_context, _state->file_schema, + *request_snapshot, file_block, rows, eof)); + _sync_page_cache_profile(); + if (_io_ctx != nullptr) { + _io_ctx->predicate_filtered_rows += + _state->scheduler.predicate_filtered_rows() - predicate_filtered_rows_before; + } + _eof = *eof; + return Status::OK(); +} + +void ParquetReader::_sync_page_cache_profile() { + if (_profile == nullptr || _state == nullptr) { + return; + } + const auto stats = _state->file_context.page_cache_stats(); + COUNTER_UPDATE(_parquet_profile.page_read_counter, + stats.read_count - _reported_page_cache_stats.read_count); + COUNTER_UPDATE(_parquet_profile.page_cache_write_counter, + stats.write_count - _reported_page_cache_stats.write_count); + COUNTER_UPDATE( + _parquet_profile.page_cache_compressed_write_counter, + stats.compressed_write_count - _reported_page_cache_stats.compressed_write_count); + COUNTER_UPDATE(_parquet_profile.page_cache_hit_counter, + stats.hit_count - _reported_page_cache_stats.hit_count); + COUNTER_UPDATE(_parquet_profile.page_cache_missing_counter, + stats.miss_count - _reported_page_cache_stats.miss_count); + COUNTER_UPDATE(_parquet_profile.page_cache_compressed_hit_counter, + stats.compressed_hit_count - _reported_page_cache_stats.compressed_hit_count); + _reported_page_cache_stats = stats; +} + +void ParquetReader::set_condition_cache_context(std::shared_ptr ctx) { + if (_state == nullptr) { + return; + } + _state->scheduler.set_condition_cache_context(std::move(ctx)); + if (_io_ctx != nullptr) { + // Condition-cache HIT filters row ranges before batch reading, so skipped rows never belong + // to a later get_block() batch. Report the plan-level skipped rows at the same point where + // the scan plan is rewritten. + _io_ctx->condition_cache_filtered_rows += _state->scheduler.condition_cache_filtered_rows(); + } +} + +int64_t ParquetReader::get_total_rows() const { + if (_state == nullptr) { + return 0; + } + int64_t rows = 0; + for (const auto& row_group_plan : _state->scan_plan.row_groups) { + rows += row_group_plan.row_group_rows; + } + return rows; +} + +Status ParquetReader::get_aggregate_result(const format::FileAggregateRequest& request, + format::FileAggregateResult* result) { + DORIS_CHECK(result != nullptr); + if (_state == nullptr || _state->file_context.metadata == nullptr || + _state->file_context.schema == nullptr) { + return Status::Uninitialized("ParquetReader is not open"); + } + result->count = 0; + result->columns.clear(); + if (request.agg_type != TPushAggOp::type::COUNT && + request.agg_type != TPushAggOp::type::MINMAX) { + return Status::NotSupported("Unsupported parquet aggregate pushdown type {}", + request.agg_type); + } + + // Aggregate row count in all selected row groups. For MIN/MAX aggregate, this is used to determine whether there is no row group selected. + for (const auto& row_group_plan : _state->scan_plan.row_groups) { + auto row_group_metadata = + _state->file_context.metadata->RowGroup(row_group_plan.row_group_id); + DORIS_CHECK(row_group_metadata != nullptr); + result->count += row_group_metadata->num_rows(); + } + if (request.agg_type == TPushAggOp::type::COUNT) { + if (request.columns.empty()) { + return Status::OK(); + } + if (request.columns.size() != 1) { + return Status::NotSupported("Parquet COUNT pushdown only supports one count column"); + } + const auto& count_projection = request.columns[0].projection; + const auto& root_schema = projected_root_schema(_state->file_schema, count_projection); + result->count = 0; + for (const auto& row_group_plan : _state->scan_plan.row_groups) { + std::shared_ptr<::parquet::RowGroupReader> row_group; + try { + row_group = _state->file_context.file_reader->RowGroup(row_group_plan.row_group_id); + } catch (const ::parquet::ParquetException& e) { + return Status::Corruption("Failed to open parquet row group {}: {}", + row_group_plan.row_group_id, e.what()); + } catch (const std::exception& e) { + return Status::InternalError("Failed to open parquet row group {}: {}", + row_group_plan.row_group_id, e.what()); + } + + ParquetColumnReaderFactory column_reader_factory( + row_group, _state->file_context.schema->num_columns(), + &row_group_plan.page_skip_plans, _parquet_profile.page_skip_profile(), + _state->timezone, _state->enable_strict_mode, + _parquet_profile.scan_profile().column_reader_profile); + std::unique_ptr shape_reader; + RETURN_IF_ERROR(column_reader_factory.create_count_shape_reader( + root_schema, &count_projection, &shape_reader)); + DORIS_CHECK(shape_reader != nullptr); + + int64_t row_group_cursor = 0; + for (const auto& selected_range : row_group_plan.selected_ranges) { + DORIS_CHECK(selected_range.start >= row_group_cursor); + RETURN_IF_ERROR(shape_reader->skip(selected_range.start - row_group_cursor)); + row_group_cursor = selected_range.start; + + int64_t range_rows_read = 0; + while (range_rows_read < selected_range.length) { + const int64_t batch_rows = + std::min(_batch_size, selected_range.length - range_rows_read); + // COUNT(col) only needs the top-level NULL state. The shape reader loads + // def/rep levels from one representative leaf and does not build value_indices + // or values_column. MAP chooses the key leaf; ARRAY/STRUCT may choose a string + // leaf, but the levels-only protocol still avoids Doris-side string + // materialization for that leaf. + RETURN_IF_ERROR(shape_reader->load_nested_levels_batch(batch_rows)); + result->count += + count_loaded_non_null_values(root_schema, *shape_reader, batch_rows); + range_rows_read += batch_rows; + row_group_cursor += batch_rows; + } + } + } + return Status::OK(); + } + + result->columns.resize(request.columns.size()); + for (size_t request_column_idx = 0; request_column_idx < request.columns.size(); + ++request_column_idx) { + const auto file_column_id = request.columns[request_column_idx].projection.local_id(); + if (file_column_id < 0 || + file_column_id >= static_cast(_state->file_schema.size())) { + return Status::InvalidArgument("Invalid parquet aggregate column id {}", + file_column_id); + } + const auto& column_schema = _state->file_schema[file_column_id]; + DORIS_CHECK(column_schema != nullptr); + const ParquetColumnSchema* leaf_schema = nullptr; + RETURN_IF_ERROR(find_projected_minmax_leaf( + *column_schema, request.columns[request_column_idx].projection, &leaf_schema)); + DORIS_CHECK(leaf_schema != nullptr); + + auto& aggregate_column = result->columns[request_column_idx]; + aggregate_column.projection = request.columns[request_column_idx].projection; + for (const auto& row_group_plan : _state->scan_plan.row_groups) { + auto row_group_metadata = + _state->file_context.metadata->RowGroup(row_group_plan.row_group_id); + DORIS_CHECK(row_group_metadata != nullptr); + auto column_chunk = row_group_metadata->ColumnChunk(leaf_schema->leaf_column_id); + DORIS_CHECK(column_chunk != nullptr); + const auto statistics = ParquetStatisticsUtils::TransformColumnStatistics( + *leaf_schema, column_chunk->statistics(), _state->timezone); + if (!statistics.has_min_max) { + return Status::NotSupported("Missing parquet min/max statistics for column {}", + leaf_schema->name); + } + if (!aggregate_column.has_min || statistics.min_value < aggregate_column.min_value) { + aggregate_column.min_value = statistics.min_value; + aggregate_column.has_min = true; + } + if (!aggregate_column.has_max || aggregate_column.max_value < statistics.max_value) { + aggregate_column.max_value = statistics.max_value; + aggregate_column.has_max = true; + } + } + if (!aggregate_column.has_min || !aggregate_column.has_max) { + return Status::NotSupported("No parquet row group selected for min/max pushdown"); + } + } + return Status::OK(); +} + +Status ParquetReader::close() { + if (_state != nullptr) { + _sync_page_cache_profile(); + RETURN_IF_ERROR(_state->file_context.close()); + } + return FileReader::close(); +} + +void ParquetReader::_init_profile() { + _parquet_profile.init(_profile); +} + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/parquet_reader.h b/be/src/format_v2/parquet/parquet_reader.h new file mode 100644 index 00000000000000..ff74b97a26e0e7 --- /dev/null +++ b/be/src/format_v2/parquet/parquet_reader.h @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include "common/status.h" +#include "format_v2/file_reader.h" +#include "format_v2/parquet/parquet_column_schema.h" +#include "format_v2/parquet/parquet_file_context.h" +#include "format_v2/parquet/parquet_profile.h" +#include "format_v2/parquet/parquet_scan.h" + +namespace doris { +namespace io { +struct IOContext; +} // namespace io +} // namespace doris + +namespace doris::format::parquet { + +struct ParquetReaderScanState; + +// ============================================================================ +// ============================================================================ +// init() -> get_schema() -> open(request) -> get_block() [loop] -> close() +// ============================================================================ +class ParquetReader : public format::FileReader { +public: + ParquetReader(std::shared_ptr& system_properties, + std::unique_ptr& file_description, + std::shared_ptr io_ctx, RuntimeProfile* profile, + std::optional global_rowid_context = std::nullopt, + bool enable_mapping_timestamp_tz = false); + ~ParquetReader() override; + + Status init(RuntimeState* state) override; + + void set_batch_size(size_t batch_size) override; + + Status get_schema(std::vector* file_schema) const override; + + std::unique_ptr create_column_mapper( + format::TableColumnMapperOptions options) const override; + + Status open(std::shared_ptr request) override; + + Status get_block(Block* file_block, size_t* rows, bool* eof) override; + + Status get_aggregate_result(const format::FileAggregateRequest& request, + format::FileAggregateResult* result) override; + + void set_condition_cache_context(std::shared_ptr ctx) override; + + int64_t get_total_rows() const override; + + Status close() override; + +protected: + void _init_profile() override; + +private: + void _sync_page_cache_profile(); + + void _fill_column_definition(const ParquetColumnSchema& column_schema, + format::ColumnDefinition* field) const; + + std::unique_ptr + _state; // complete scan state (file_context + schema + scheduler) + ParquetProfile _parquet_profile; // RuntimeProfile counter set + ParquetPageCacheStats _reported_page_cache_stats; + std::optional _global_rowid_context; // global RowId context + size_t _batch_size = ParquetScanScheduler::DEFAULT_READ_BATCH_SIZE; + bool _enable_mapping_timestamp_tz = false; // whether UTC timestamps are mapped to TIMESTAMPTZ +}; + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/parquet_scan.cpp b/be/src/format_v2/parquet/parquet_scan.cpp new file mode 100644 index 00000000000000..d636f3e3f9ee41 --- /dev/null +++ b/be/src/format_v2/parquet/parquet_scan.cpp @@ -0,0 +1,648 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/parquet_scan.h" + +#include +#include +#include +#include + +#include "common/exception.h" +#include "common/status.h" +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_vector.h" +#include "exprs/vexpr_context.h" +#include "format_v2/parquet/parquet_column_schema.h" +#include "format_v2/parquet/parquet_file_context.h" +#include "format_v2/parquet/parquet_statistics.h" + +namespace doris::format::parquet { + +namespace { + +int64_t column_start_offset(const ::parquet::ColumnChunkMetaData& column_metadata) { + return column_metadata.has_dictionary_page() + ? cast_set(column_metadata.dictionary_page_offset()) + : cast_set(column_metadata.data_page_offset()); +} + +bool is_row_group_outside_range(const ::parquet::FileMetaData& metadata, + const ParquetScanRange& scan_range, int row_group_idx) { + if (scan_range.size < 0) { + return false; + } + const int64_t range_start_offset = scan_range.start_offset; + const int64_t range_end_offset = range_start_offset + scan_range.size; + DORIS_CHECK(range_start_offset >= 0); + DORIS_CHECK(range_end_offset >= range_start_offset); + if (range_start_offset == 0 && + (scan_range.file_size < 0 || range_end_offset >= scan_range.file_size)) { + return false; + } + + auto row_group_metadata = metadata.RowGroup(row_group_idx); + DORIS_CHECK(row_group_metadata != nullptr); + DORIS_CHECK(row_group_metadata->num_columns() > 0); + const auto first_column = row_group_metadata->ColumnChunk(0); + const auto last_column = row_group_metadata->ColumnChunk(row_group_metadata->num_columns() - 1); + DORIS_CHECK(first_column != nullptr); + DORIS_CHECK(last_column != nullptr); + const int64_t row_group_start_offset = column_start_offset(*first_column); + const int64_t row_group_end_offset = + column_start_offset(*last_column) + last_column->total_compressed_size(); + const int64_t row_group_mid_offset = + row_group_start_offset + (row_group_end_offset - row_group_start_offset) / 2; + return row_group_mid_offset < range_start_offset || row_group_mid_offset >= range_end_offset; +} + +} // namespace + +Status plan_parquet_row_groups(const ::parquet::FileMetaData& metadata, + ::parquet::ParquetFileReader* file_reader, + const std::vector>& file_schema, + const format::FileScanRequest& request, + const ParquetScanRange& scan_range, bool enable_bloom_filter, + RowGroupScanPlan* plan, const cctz::time_zone* timezone) { + DORIS_CHECK(plan != nullptr); + plan->row_groups.clear(); + plan->pruning_stats = ParquetPruningStats {}; + + std::vector row_group_first_rows(metadata.num_row_groups()); + std::vector scan_range_selected_row_groups; + scan_range_selected_row_groups.reserve(metadata.num_row_groups()); + int64_t next_row_group_first_row = 0; + for (int row_group_idx = 0; row_group_idx < metadata.num_row_groups(); ++row_group_idx) { + row_group_first_rows[row_group_idx] = next_row_group_first_row; + auto row_group_metadata = metadata.RowGroup(row_group_idx); + DORIS_CHECK(row_group_metadata != nullptr); + const int64_t row_group_rows = row_group_metadata->num_rows(); + if (row_group_rows < 0) { + return Status::Corruption("Invalid negative row count in parquet row group {}", + row_group_idx); + } + next_row_group_first_row += row_group_rows; + if (!is_row_group_outside_range(metadata, scan_range, row_group_idx)) { + scan_range_selected_row_groups.push_back(row_group_idx); + } + } + + std::vector statistics_selected_row_groups; + RETURN_IF_ERROR(select_row_groups_by_statistics( + metadata, file_reader, file_schema, request, &scan_range_selected_row_groups, + &statistics_selected_row_groups, enable_bloom_filter, &plan->pruning_stats, timezone)); + + plan->row_groups.reserve(statistics_selected_row_groups.size()); + for (const auto row_group_idx : statistics_selected_row_groups) { + auto row_group_metadata = metadata.RowGroup(row_group_idx); + DORIS_CHECK(row_group_metadata != nullptr); + const int64_t row_group_rows = row_group_metadata->num_rows(); + if (row_group_rows == 0) { + continue; + } + + RowGroupReadPlan row_group_plan; + row_group_plan.row_group_id = row_group_idx; + row_group_plan.first_file_row = row_group_first_rows[row_group_idx]; + row_group_plan.row_group_rows = row_group_rows; + RETURN_IF_ERROR(select_row_group_ranges_by_page_index( + file_reader, file_schema, request, row_group_idx, row_group_rows, + &row_group_plan.selected_ranges, &row_group_plan.page_skip_plans, + &plan->pruning_stats, timezone)); + if (row_group_plan.selected_ranges.empty()) { + continue; + } + plan->pruning_stats.selected_row_ranges += row_group_plan.selected_ranges.size(); + plan->row_groups.push_back(std::move(row_group_plan)); + } + plan->pruning_stats.selected_row_groups = plan->row_groups.size(); + return Status::OK(); +} + +namespace { + +uint16_t apply_filter_to_selection(const IColumn::Filter& filter, SelectionVector* selection, + uint16_t selected_rows) { + uint16_t new_selected_rows = 0; + for (uint16_t selection_idx = 0; selection_idx < selected_rows; ++selection_idx) { + const auto row_idx = selection->get_index(selection_idx); + if (filter[row_idx] != 0) { + selection->set_index(new_selected_rows++, static_cast(row_idx)); + } + } + return new_selected_rows; +} + +Status execute_filter_conjuncts(const format::FileScanRequest& request, int64_t batch_rows, + Block* file_block, SelectionVector* selection, + uint16_t* selected_rows) { + for (const auto& conjunct : request.conjuncts) { + if (*selected_rows == 0) { + break; + } + DORIS_CHECK(conjunct != nullptr); + IColumn::Filter filter(static_cast(batch_rows), 1); + bool can_filter_all = false; + RETURN_IF_ERROR(conjunct->execute_filter(file_block, filter.data(), + static_cast(batch_rows), false, + &can_filter_all)); + *selected_rows = + can_filter_all ? 0 : apply_filter_to_selection(filter, selection, *selected_rows); + } + return Status::OK(); +} + +Status execute_delete_conjuncts(const format::FileScanRequest& request, int64_t batch_rows, + Block* file_block, SelectionVector* selection, + uint16_t* selected_rows) { + for (const auto& delete_conjunct : request.delete_conjuncts) { + if (*selected_rows == 0) { + break; + } + DORIS_CHECK(delete_conjunct != nullptr); + int result_column_id = -1; + RETURN_IF_ERROR(delete_conjunct->root()->execute(delete_conjunct.get(), file_block, + &result_column_id)); + DORIS_CHECK(result_column_id >= 0 && + result_column_id < static_cast(file_block->columns())); + const auto& delete_filter = assert_cast( + *file_block->get_by_position(result_column_id).column) + .get_data(); + DORIS_CHECK(delete_filter.size() == static_cast(batch_rows)); + IColumn::Filter keep_filter(static_cast(batch_rows), 1); + bool has_kept_row = false; + for (size_t row = 0; row < static_cast(batch_rows); ++row) { + keep_filter[row] = !delete_filter[row]; + has_kept_row |= keep_filter[row] != 0; + } + file_block->erase(result_column_id); + *selected_rows = + !has_kept_row ? 0 + : apply_filter_to_selection(keep_filter, selection, *selected_rows); + } + return Status::OK(); +} + +} // namespace + +IColumn::Filter selection_to_filter(const SelectionVector& selection, uint16_t selected_rows, + int64_t batch_rows) { + IColumn::Filter filter(static_cast(batch_rows), 0); + for (uint16_t selection_idx = 0; selection_idx < selected_rows; ++selection_idx) { + filter[selection.get_index(selection_idx)] = 1; + } + return filter; +} + +Status execute_batch_filters(const format::FileScanRequest& request, int64_t batch_rows, + Block* file_block, SelectionVector* selection, uint16_t* selected_rows, + int64_t* conjunct_filtered_rows) { + if (request.conjuncts.empty() && request.delete_conjuncts.empty()) { + return Status::OK(); + } + const auto selected_rows_before_conjunct = *selected_rows; + RETURN_IF_ERROR( + execute_filter_conjuncts(request, batch_rows, file_block, selection, selected_rows)); + if (conjunct_filtered_rows != nullptr) { + *conjunct_filtered_rows += static_cast(selected_rows_before_conjunct) - + static_cast(*selected_rows); + } + if (*selected_rows == 0) { + return Status::OK(); + } + return execute_delete_conjuncts(request, batch_rows, file_block, selection, selected_rows); +} + +namespace { +int64_t count_range_rows(const std::vector& ranges) { + int64_t rows = 0; + for (const auto& range : ranges) { + rows += range.length; + } + return rows; +} + +void append_intersection(const RowRange& left, const RowRange& right, + std::vector* result) { + const int64_t start = std::max(left.start, right.start); + const int64_t end = std::min(left.start + left.length, right.start + right.length); + if (start < end) { + result->push_back(RowRange {.start = start, .length = end - start}); + } +} + +std::vector filter_ranges_by_condition_cache(const std::vector& ranges, + const std::vector& cache, + int64_t row_group_first_row, + int64_t base_granule) { + std::vector result; + if (cache.empty()) { + return ranges; + } + + // Cache coordinates are file-global granules; RowRange coordinates are row-group-relative. + // Walk every selected range in order and split it by granule. Granules covered by the bitmap + // are kept only when the bit is true. Granules outside the bitmap are kept conservatively, so + // an undersized or old-format cache entry cannot skip valid rows. + for (const auto& range : ranges) { + const int64_t global_start = row_group_first_row + range.start; + const int64_t global_end = global_start + range.length; + for (int64_t granule = global_start / ConditionCacheContext::GRANULE_SIZE; + granule <= (global_end - 1) / ConditionCacheContext::GRANULE_SIZE; ++granule) { + const int64_t cache_idx = granule - base_granule; + const bool keep = cache_idx < 0 || static_cast(cache_idx) >= cache.size() || + cache[static_cast(cache_idx)]; + if (!keep) { + continue; + } + const int64_t granule_start = granule * ConditionCacheContext::GRANULE_SIZE; + const int64_t granule_end = granule_start + ConditionCacheContext::GRANULE_SIZE; + const RowRange file_granule_range {.start = granule_start - row_group_first_row, + .length = granule_end - granule_start}; + append_intersection(range, file_granule_range, &result); + } + } + return result; +} + +} // namespace + +void ParquetScanScheduler::set_plan(RowGroupScanPlan plan) { + _row_group_plans = std::move(plan.row_groups); + _condition_cache_filtered_rows = 0; + _predicate_filtered_rows = 0; + reset(); +} + +void ParquetScanScheduler::set_condition_cache_context(std::shared_ptr ctx) { + _condition_cache_ctx = std::move(ctx); + if (!_condition_cache_ctx || !_condition_cache_ctx->filter_result || _row_group_plans.empty()) { + return; + } + + _condition_cache_ctx->base_granule = + _row_group_plans.front().first_file_row / ConditionCacheContext::GRANULE_SIZE; + if (!_condition_cache_ctx->is_hit) { + return; + } + + std::vector filtered_plans; + filtered_plans.reserve(_row_group_plans.size()); + for (auto& plan : _row_group_plans) { + const int64_t old_rows = count_range_rows(plan.selected_ranges); + plan.selected_ranges = filter_ranges_by_condition_cache( + plan.selected_ranges, *_condition_cache_ctx->filter_result, plan.first_file_row, + _condition_cache_ctx->base_granule); + const int64_t new_rows = count_range_rows(plan.selected_ranges); + _condition_cache_filtered_rows += old_rows - new_rows; + if (!plan.selected_ranges.empty()) { + filtered_plans.push_back(std::move(plan)); + } + } + _row_group_plans = std::move(filtered_plans); + reset(); +} + +void ParquetScanScheduler::reset() { + _next_row_group_plan_idx = 0; + reset_current_row_group(); +} + +void ParquetScanScheduler::reset_current_row_group() { + _current_row_group.reset(); + _current_predicate_columns.clear(); + _current_non_predicate_columns.clear(); + _current_row_group_rows = 0; + _current_row_group_rows_read = 0; + _current_row_group_first_row = 0; + _current_selected_ranges.clear(); + _current_range_idx = 0; + _current_range_rows_read = 0; +} + +Status ParquetScanScheduler::open_next_row_group( + ParquetFileContext& file_context, + const std::vector>& file_schema, + const format::FileScanRequest& request, bool* has_row_group) { + *has_row_group = false; + if (_next_row_group_plan_idx >= _row_group_plans.size()) { + return Status::OK(); + } + const RowGroupReadPlan& row_group_plan = _row_group_plans[_next_row_group_plan_idx++]; + const int row_group_idx = row_group_plan.row_group_id; + try { + _current_row_group = file_context.file_reader->RowGroup(row_group_idx); + } catch (const ::parquet::ParquetException& e) { + return Status::Corruption("Failed to open parquet row group {}: {}", row_group_idx, + e.what()); + } catch (const std::exception& e) { + return Status::InternalError("Failed to open parquet row group {}: {}", row_group_idx, + e.what()); + } + + auto row_group_metadata = file_context.metadata->RowGroup(row_group_idx); + DORIS_CHECK(row_group_metadata != nullptr); + _current_row_group_rows = row_group_metadata->num_rows(); + DORIS_CHECK(_current_row_group_rows == row_group_plan.row_group_rows); + DORIS_CHECK(_current_row_group_rows > 0); + DORIS_CHECK(!row_group_plan.selected_ranges.empty()); + _current_row_group_first_row = row_group_plan.first_file_row; + _current_row_group_rows_read = 0; + _current_selected_ranges = row_group_plan.selected_ranges; + _current_range_idx = 0; + _current_range_rows_read = 0; + _current_predicate_columns.clear(); + _current_non_predicate_columns.clear(); + + ParquetColumnReaderFactory column_reader_factory( + _current_row_group, file_context.schema->num_columns(), &row_group_plan.page_skip_plans, + _page_skip_profile, _timezone, _enable_strict_mode, + _scan_profile.column_reader_profile); + for (const auto& col : request.predicate_columns) { + const auto local_id = col.local_id(); + if (local_id == format::ROW_POSITION_COLUMN_ID) { + _current_predicate_columns[local_id] = + column_reader_factory.create_row_position_column_reader( + _current_row_group_first_row); + continue; + } + if (local_id == format::GLOBAL_ROWID_COLUMN_ID) { + DORIS_CHECK(_global_rowid_context.has_value()); + _current_predicate_columns[local_id] = + column_reader_factory.create_global_rowid_column_reader( + *_global_rowid_context, _current_row_group_first_row); + continue; + } + + DORIS_CHECK(local_id >= 0 && local_id < static_cast(file_schema.size())); + const auto& column_schema = file_schema[local_id]; + DORIS_CHECK(column_schema != nullptr); + std::unique_ptr column_reader; + RETURN_IF_ERROR(column_reader_factory.create(*column_schema, &col, &column_reader)); + _current_predicate_columns[local_id] = std::move(column_reader); + } + for (const auto& col : request.non_predicate_columns) { + const auto local_id = col.local_id(); + if (local_id == format::ROW_POSITION_COLUMN_ID) { + _current_non_predicate_columns[local_id] = + column_reader_factory.create_row_position_column_reader( + _current_row_group_first_row); + continue; + } + if (local_id == format::GLOBAL_ROWID_COLUMN_ID) { + DORIS_CHECK(_global_rowid_context.has_value()); + _current_non_predicate_columns[local_id] = + column_reader_factory.create_global_rowid_column_reader( + *_global_rowid_context, _current_row_group_first_row); + continue; + } + DORIS_CHECK(local_id >= 0 && local_id < static_cast(file_schema.size())); + const auto& column_schema = file_schema[local_id]; + DORIS_CHECK(column_schema != nullptr); + std::unique_ptr column_reader; + RETURN_IF_ERROR(column_reader_factory.create(*column_schema, &col, &column_reader)); + _current_non_predicate_columns[local_id] = std::move(column_reader); + } + *has_row_group = true; + return Status::OK(); +} + +Status ParquetScanScheduler::skip_current_row_group_rows(int64_t rows) { + DORIS_CHECK(rows >= 0); + if (rows == 0) { + return Status::OK(); + } + if (_scan_profile.range_gap_skipped_rows != nullptr) { + COUNTER_UPDATE(_scan_profile.range_gap_skipped_rows, rows); + } + for (const auto& column_reader : _current_predicate_columns | std::views::values) { + RETURN_IF_ERROR(column_reader->skip(rows)); + } + for (const auto& column_reader : _current_non_predicate_columns | std::views::values) { + RETURN_IF_ERROR(column_reader->skip(rows)); + } + _current_row_group_rows_read += rows; + return Status::OK(); +} + +Status ParquetScanScheduler::read_filter_columns(int64_t batch_rows, + const format::FileScanRequest& request, + Block* file_block, SelectionVector* selection, + uint16_t* selected_rows, + int64_t* conjunct_filtered_rows) { + if (!request.conjuncts.empty() || !request.delete_conjuncts.empty()) { + selection->resize(static_cast(batch_rows)); + } + for (const auto& [fid, column_reader] : _current_predicate_columns) { + auto position_it = request.local_positions.find(format::LocalColumnId(fid)); + DORIS_CHECK(position_it != request.local_positions.end()); + const auto block_position = position_it->second.value(); + DCHECK(remove_nullable(column_reader->type()) + ->equals(*remove_nullable(file_block->get_by_position(block_position).type))) + << column_reader->type()->get_name() << " " + << file_block->get_by_position(block_position).type->get_name() << " " + << column_reader->name() << " " << file_block->get_by_position(block_position).name; + auto column = file_block->get_by_position(block_position).column->assert_mutable(); + int64_t column_rows = 0; + { + SCOPED_TIMER(_scan_profile.column_read_time); + RETURN_IF_ERROR(column_reader->read(batch_rows, column, &column_rows)); + } + if (column_rows != batch_rows) { + return Status::Corruption("Parquet filter column {} returned {} rows, expected {} rows", + column_reader->name(), column_rows, batch_rows); + } + file_block->replace_by_position(block_position, std::move(column)); + } + if (_scan_profile.predicate_filter_time == nullptr) { + return execute_batch_filters(request, batch_rows, file_block, selection, selected_rows, + conjunct_filtered_rows); + } + SCOPED_TIMER(_scan_profile.predicate_filter_time); + return execute_batch_filters(request, batch_rows, file_block, selection, selected_rows, + conjunct_filtered_rows); +} + +Status ParquetScanScheduler::read_current_row_group_batch(int64_t batch_rows, + const format::FileScanRequest& request, + int64_t batch_first_file_row, + Block* file_block, size_t* rows) { + if (_scan_profile.total_batches != nullptr) { + COUNTER_UPDATE(_scan_profile.total_batches, 1); + } + if (_scan_profile.raw_rows_read != nullptr) { + COUNTER_UPDATE(_scan_profile.raw_rows_read, batch_rows); + } + if (_current_predicate_columns.empty() && _current_non_predicate_columns.empty()) { + *rows = static_cast(batch_rows); + if (_scan_profile.selected_rows != nullptr) { + COUNTER_UPDATE(_scan_profile.selected_rows, batch_rows); + } + return Status::OK(); + } + SelectionVector selection; + DORIS_CHECK(batch_rows <= std::numeric_limits::max()); + uint16_t selected_rows = static_cast(batch_rows); + int64_t conjunct_filtered_rows = 0; + RETURN_IF_ERROR(read_filter_columns(batch_rows, request, file_block, &selection, &selected_rows, + &conjunct_filtered_rows)); + _predicate_filtered_rows += conjunct_filtered_rows; + mark_condition_cache_granules(selection, selected_rows, batch_first_file_row); + + const bool need_filter_output = selected_rows != batch_rows; + if (_scan_profile.selected_rows != nullptr) { + COUNTER_UPDATE(_scan_profile.selected_rows, selected_rows); + } + if (_scan_profile.rows_filtered_by_conjunct != nullptr) { + COUNTER_UPDATE(_scan_profile.rows_filtered_by_conjunct, conjunct_filtered_rows); + } + if (!_current_non_predicate_columns.empty() && + _scan_profile.lazy_read_filtered_rows != nullptr) { + COUNTER_UPDATE(_scan_profile.lazy_read_filtered_rows, batch_rows - selected_rows); + } + if (selected_rows == 0 && _scan_profile.empty_selection_batches != nullptr) { + COUNTER_UPDATE(_scan_profile.empty_selection_batches, 1); + } + if (need_filter_output) { + IColumn::Filter output_filter = selection_to_filter(selection, selected_rows, batch_rows); + for (const auto& col : request.predicate_columns) { + auto position_it = request.local_positions.find(col.column_id()); + DORIS_CHECK(position_it != request.local_positions.end()); + const auto block_position = position_it->second.value(); + RETURN_IF_CATCH_EXCEPTION(file_block->replace_by_position( + block_position, file_block->get_by_position(block_position) + .column->filter(output_filter, selected_rows))); + } + } + + { + SCOPED_TIMER(_scan_profile.column_read_time); + for (const auto& [fid, column_reader] : _current_non_predicate_columns) { + auto position_it = request.local_positions.find(format::LocalColumnId(fid)); + DORIS_CHECK(position_it != request.local_positions.end()); + const auto block_position = position_it->second.value(); + auto column = file_block->get_by_position(block_position).column->assert_mutable(); + DCHECK_EQ(file_block->get_by_position(block_position).type->get_primitive_type(), + column_reader->type()->get_primitive_type()) + << type_to_string(file_block->get_by_position(block_position) + .type->get_primitive_type()) + << " " << type_to_string(column_reader->type()->get_primitive_type()) << " " + << column_reader->name() << " " << fid << " " << block_position; + if (need_filter_output) { + [[maybe_unused]] auto old_size = column->size(); + RETURN_IF_ERROR( + column_reader->select(selection, selected_rows, batch_rows, column)); + if (column->size() != old_size + selected_rows) { + return Status::Corruption( + "Parquet selected output column {} returned {} rows, expected {} rows", + column_reader->name(), column->size(), old_size + selected_rows); + } + } else { + int64_t column_rows = 0; + RETURN_IF_ERROR(column_reader->read(batch_rows, column, &column_rows)); + if (column_rows != batch_rows) { + return Status::Corruption( + "Parquet output column {} returned {} rows, expected {} rows", + column_reader->name(), column_rows, batch_rows); + } + } + file_block->replace_by_position(block_position, std::move(column)); + } + } + *rows = static_cast(selected_rows); + return Status::OK(); +} + +void ParquetScanScheduler::mark_condition_cache_granules(const SelectionVector& selection, + uint16_t selected_rows, + int64_t batch_first_file_row) { + if (!_condition_cache_ctx || _condition_cache_ctx->is_hit || + !_condition_cache_ctx->filter_result) { + return; + } + auto& cache = *_condition_cache_ctx->filter_result; + for (uint16_t selection_idx = 0; selection_idx < selected_rows; ++selection_idx) { + const int64_t file_row = batch_first_file_row + selection.get_index(selection_idx); + const int64_t granule = file_row / ConditionCacheContext::GRANULE_SIZE; + const int64_t cache_idx = granule - _condition_cache_ctx->base_granule; + if (cache_idx >= 0 && static_cast(cache_idx) < cache.size()) { + cache[static_cast(cache_idx)] = true; + } + } +} + +Status ParquetScanScheduler::read_next_batch( + ParquetFileContext& file_context, + const std::vector>& file_schema, + const format::FileScanRequest& request, Block* file_block, size_t* rows, bool* eof) { + *rows = 0; + while (true) { + if (_current_row_group == nullptr) { + bool has_row_group = false; + RETURN_IF_ERROR( + open_next_row_group(file_context, file_schema, request, &has_row_group)); + if (!has_row_group) { + *eof = true; + return Status::OK(); + } + } + + if (_current_range_idx >= _current_selected_ranges.size()) { + // Current row group finished, try next row group. + reset_current_row_group(); + continue; + } + + const RowRange& current_range = _current_selected_ranges[_current_range_idx]; + DORIS_CHECK(current_range.start >= 0); + DORIS_CHECK(current_range.length > 0); + DORIS_CHECK(current_range.start + current_range.length <= _current_row_group_rows); + + if (_current_row_group_rows_read < current_range.start) { + // Skip filtered rows according to row group level pruning. + RETURN_IF_ERROR(skip_current_row_group_rows(current_range.start - + _current_row_group_rows_read)); + } + DORIS_CHECK(_current_row_group_rows_read == current_range.start + _current_range_rows_read); + const int64_t remaining_rows = current_range.length - _current_range_rows_read; + if (remaining_rows <= 0) { + // Current range finished, try next range in the same row group. + ++_current_range_idx; + _current_range_rows_read = 0; + continue; + } + + const int64_t batch_rows = std::min(_batch_size, remaining_rows); + const int64_t physical_rows_read = batch_rows; + const int64_t batch_first_file_row = + _current_row_group_first_row + _current_row_group_rows_read; + RETURN_IF_ERROR(read_current_row_group_batch(batch_rows, request, batch_first_file_row, + file_block, rows)); + _current_row_group_rows_read += physical_rows_read; + _current_range_rows_read += physical_rows_read; + if (_current_range_rows_read >= current_range.length) { + ++_current_range_idx; + _current_range_rows_read = 0; + } + if (*rows == 0) { + continue; + } + *eof = false; + return Status::OK(); + } +} + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/parquet_scan.h b/be/src/format_v2/parquet/parquet_scan.h new file mode 100644 index 00000000000000..ca3c665a2e95f8 --- /dev/null +++ b/be/src/format_v2/parquet/parquet_scan.h @@ -0,0 +1,182 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "core/column/column.h" +#include "format_v2/file_reader.h" +#include "format_v2/parquet/parquet_profile.h" +#include "format_v2/parquet/parquet_statistics.h" +#include "format_v2/parquet/reader/column_reader.h" +#include "format_v2/parquet/selection_vector.h" +#include "runtime/runtime_profile.h" +#include "storage/segment/condition_cache.h" + +namespace parquet { +class FileMetaData; +class ParquetFileReader; +class RowGroupReader; +} // namespace parquet + +namespace cctz { +class time_zone; +} // namespace cctz + +namespace doris { +class Block; + +namespace format { +struct FileScanRequest; +} // namespace format +} // namespace doris + +namespace doris::format::parquet { + +struct ParquetFileContext; +struct ParquetColumnSchema; + +// ============================================================================ +// ============================================================================ + +struct ParquetScanRange { + int64_t start_offset = 0; + int64_t size = -1; // -1 means read the whole file + int64_t file_size = -1; // -1 means unknown +}; + +struct RowGroupReadPlan { + int row_group_id = -1; // row group id + int64_t first_file_row = 0; // first file row for this row group (0-based) + int64_t row_group_rows = 0; // row count of this row group + std::vector selected_ranges; // row ranges to read after page-index pruning + std::map + page_skip_plans; // leaf_column_id -> data pages that can be skipped completely +}; + +struct RowGroupScanPlan { + std::vector row_groups; // row groups selected after pruning + ParquetPruningStats pruning_stats; // pruning statistics +}; + +// ============================================================================ +// ============================================================================ + +Status plan_parquet_row_groups(const ::parquet::FileMetaData& metadata, + ::parquet::ParquetFileReader* file_reader, + const std::vector>& file_schema, + const format::FileScanRequest& request, + const ParquetScanRange& scan_range, bool enable_bloom_filter, + RowGroupScanPlan* plan, const cctz::time_zone* timezone = nullptr); + +IColumn::Filter selection_to_filter(const SelectionVector& selection, uint16_t selected_rows, + int64_t batch_rows); + +Status execute_batch_filters(const format::FileScanRequest& request, int64_t batch_rows, + Block* file_block, SelectionVector* selection, uint16_t* selected_rows, + int64_t* conjunct_filtered_rows = nullptr); + +// ============================================================================ +// ============================================================================ +// while true: +// 3. read_current_row_group_batch(batch_rows) +// ============================================================================ +class ParquetScanScheduler { +public: + static constexpr int64_t DEFAULT_READ_BATCH_SIZE = 4096; + + void set_plan(RowGroupScanPlan plan); + void set_page_skip_profile(ParquetPageSkipProfile page_skip_profile) { + _page_skip_profile = page_skip_profile; + } + void set_scan_profile(ParquetScanProfile scan_profile) { _scan_profile = scan_profile; } + void set_global_rowid_context(std::optional context) { + _global_rowid_context = context; + } + void set_condition_cache_context(std::shared_ptr ctx); + void set_timezone(const cctz::time_zone* timezone) { _timezone = timezone; } + void set_enable_strict_mode(bool enable_strict_mode) { + _enable_strict_mode = enable_strict_mode; + } + // Upper scanner owns adaptive memory feedback; scheduler only applies the current row cap when + // splitting selected row ranges into physical read batches. + void set_batch_size(size_t batch_size) { + _batch_size = batch_size == 0 ? 1 : static_cast(batch_size); + } + void reset(); + bool empty() const { return _row_group_plans.empty(); } + int64_t condition_cache_filtered_rows() const { return _condition_cache_filtered_rows; } + int64_t predicate_filtered_rows() const { return _predicate_filtered_rows; } + + Status read_next_batch(ParquetFileContext& file_context, + const std::vector>& file_schema, + const format::FileScanRequest& request, Block* file_block, size_t* rows, + bool* eof); + +private: + void reset_current_row_group(); + + Status open_next_row_group(ParquetFileContext& file_context, + const std::vector>& file_schema, + const format::FileScanRequest& request, bool* has_row_group); + + Status skip_current_row_group_rows(int64_t rows); + + Status read_filter_columns(int64_t batch_rows, const format::FileScanRequest& request, + Block* file_block, SelectionVector* selection, + uint16_t* selected_rows, int64_t* conjunct_filtered_rows); + + Status read_current_row_group_batch(int64_t batch_rows, const format::FileScanRequest& request, + int64_t batch_first_file_row, Block* file_block, + size_t* rows); + + void mark_condition_cache_granules(const SelectionVector& selection, uint16_t selected_rows, + int64_t batch_first_file_row); + + std::vector _row_group_plans; // row group queue to scan + size_t _next_row_group_plan_idx = 0; // index of the next row group to process + + std::shared_ptr<::parquet::RowGroupReader> _current_row_group; // Arrow RowGroup reader + std::map> + _current_predicate_columns; // predicate ColumnReaders + std::map> + _current_non_predicate_columns; // non-predicate ColumnReaders + int64_t _current_row_group_rows = 0; // current row group row count + int64_t _current_row_group_rows_read = 0; // rows read in the current row group (cursor) + int64_t _current_row_group_first_row = 0; // first file row of the current row group + std::vector + _current_selected_ranges; // selected ranges for the current row group after page-index pruning + size_t _current_range_idx = 0; // current selected_range index + int64_t _current_range_rows_read = 0; // rows read in the current range + + ParquetPageSkipProfile _page_skip_profile; + ParquetScanProfile _scan_profile; + std::optional _global_rowid_context; + const cctz::time_zone* _timezone = nullptr; + bool _enable_strict_mode = false; + int64_t _batch_size = DEFAULT_READ_BATCH_SIZE; + std::shared_ptr _condition_cache_ctx; + int64_t _condition_cache_filtered_rows = 0; + int64_t _predicate_filtered_rows = 0; +}; + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/parquet_statistics.cpp b/be/src/format_v2/parquet/parquet_statistics.cpp new file mode 100644 index 00000000000000..1207aecd8e4877 --- /dev/null +++ b/be/src/format_v2/parquet/parquet_statistics.cpp @@ -0,0 +1,1303 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/parquet_statistics.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/config.h" +#include "core/data_type/data_type.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type_serde/data_type_serde.h" +#include "core/field.h" +#include "format_v2/parquet/parquet_column_schema.h" +#include "runtime/runtime_profile.h" +#include "storage/index/zone_map/zone_map_index.h" +#include "storage/predicate/accept_null_predicate.h" +#include "storage/predicate/column_predicate.h" + +namespace doris::format::parquet { + +namespace { + +enum class ParquetRowGroupPruneReason { + NONE, // cannot prune; must read + STATISTICS, // excluded by min/max statistics + DICTIONARY, // excluded by dictionary + BLOOM_FILTER, // excluded by bloom filter +}; + +PrimitiveType physical_filter_type(const ParquetColumnSchema& column_schema) { + if (column_schema.type == nullptr) { + return INVALID_TYPE; + } + switch (remove_nullable(column_schema.type)->get_primitive_type()) { + case TYPE_BOOLEAN: + case TYPE_INT: + case TYPE_BIGINT: + case TYPE_FLOAT: + case TYPE_DOUBLE: + case TYPE_STRING: + return remove_nullable(column_schema.type)->get_primitive_type(); + default: + return INVALID_TYPE; + } +} + +DecodedTimeUnit decoded_time_unit(ParquetTimeUnit time_unit) { + switch (time_unit) { + case ParquetTimeUnit::MILLIS: + return DecodedTimeUnit::MILLIS; + case ParquetTimeUnit::MICROS: + return DecodedTimeUnit::MICROS; + case ParquetTimeUnit::NANOS: + return DecodedTimeUnit::NANOS; + default: + return DecodedTimeUnit::UNKNOWN; + } +} + +Status read_decoded_field(const ParquetColumnSchema& column_schema, DecodedColumnView view, + Field* field, const cctz::time_zone* timezone) { + DORIS_CHECK(column_schema.type != nullptr); + DORIS_CHECK(field != nullptr); + constexpr uint8_t not_null = 0; + view.row_count = 1; + view.null_map = ¬_null; + view.time_unit = decoded_time_unit(column_schema.type_descriptor.time_unit); + view.logical_integer_bit_width = column_schema.type_descriptor.integer_bit_width; + view.logical_integer_is_signed = !column_schema.type_descriptor.is_unsigned_integer; + view.decimal_precision = column_schema.type_descriptor.decimal_precision; + view.decimal_scale = column_schema.type_descriptor.decimal_scale; + view.fixed_length = column_schema.type_descriptor.fixed_length; + view.timestamp_is_adjusted_to_utc = column_schema.type_descriptor.timestamp_is_adjusted_to_utc; + view.timezone = timezone; + return column_schema.type->get_serde()->read_field_from_decoded_value(*column_schema.type, + field, view); +} + +template +bool set_decoded_field(const ParquetColumnSchema& column_schema, DecodedValueKind value_kind, + const NativeType& value, Field* field, const cctz::time_zone* timezone) { + DecodedColumnView view; + view.value_kind = value_kind; + view.values = reinterpret_cast(&value); + return read_decoded_field(column_schema, view, field, timezone).ok(); +} + +template +bool set_decoded_min_max(const std::shared_ptr<::parquet::Statistics>& statistics, + const ParquetColumnSchema& column_schema, DecodedValueKind value_kind, + ParquetColumnStatistics* column_statistics, + const cctz::time_zone* timezone) { + auto typed_statistics = + std::static_pointer_cast<::parquet::TypedStatistics>(statistics); + if (!set_decoded_field(column_schema, value_kind, typed_statistics->min(), + &column_statistics->min_value, timezone) || + !set_decoded_field(column_schema, value_kind, typed_statistics->max(), + &column_statistics->max_value, timezone)) { + return false; + } + return true; +} + +bool set_decoded_binary_field(const ParquetColumnSchema& column_schema, DecodedValueKind value_kind, + const StringRef& value, Field* field, + const cctz::time_zone* timezone) { + std::vector binary_values {value}; + DecodedColumnView view; + view.value_kind = value_kind; + view.binary_values = &binary_values; + return read_decoded_field(column_schema, view, field, timezone).ok(); +} + +bool set_string_min_max(const std::shared_ptr<::parquet::Statistics>& statistics, + const ParquetColumnSchema& column_schema, + ParquetColumnStatistics* column_statistics, + const cctz::time_zone* timezone) { + switch (statistics->physical_type()) { + case ::parquet::Type::BYTE_ARRAY: { + auto typed_statistics = + std::static_pointer_cast<::parquet::TypedStatistics<::parquet::ByteArrayType>>( + statistics); + const auto min = ::parquet::ByteArrayToString(typed_statistics->min()); + const auto max = ::parquet::ByteArrayToString(typed_statistics->max()); + if (!set_decoded_binary_field(column_schema, DecodedValueKind::BINARY, + StringRef(min.data(), min.size()), + &column_statistics->min_value, timezone) || + !set_decoded_binary_field(column_schema, DecodedValueKind::BINARY, + StringRef(max.data(), max.size()), + &column_statistics->max_value, timezone)) { + return false; + } + return true; + } + case ::parquet::Type::FIXED_LEN_BYTE_ARRAY: { + if (column_schema.descriptor == nullptr || column_schema.descriptor->type_length() <= 0) { + return false; + } + auto typed_statistics = + std::static_pointer_cast<::parquet::TypedStatistics<::parquet::FLBAType>>( + statistics); + const int type_length = column_schema.descriptor->type_length(); + const std::string min(reinterpret_cast(typed_statistics->min().ptr), + type_length); + const std::string max(reinterpret_cast(typed_statistics->max().ptr), + type_length); + if (!set_decoded_binary_field(column_schema, DecodedValueKind::FIXED_BINARY, + StringRef(min.data(), min.size()), + &column_statistics->min_value, timezone) || + !set_decoded_binary_field(column_schema, DecodedValueKind::FIXED_BINARY, + StringRef(max.data(), max.size()), + &column_statistics->max_value, timezone)) { + return false; + } + return true; + } + default: + return false; + } +} + +bool is_null_only_predicate(const ColumnPredicate& predicate) { + return predicate.type() == PredicateType::IS_NULL || + predicate.type() == PredicateType::IS_NOT_NULL; +} + +bool is_supported_dictionary_predicate(const ColumnPredicate& predicate) { + switch (predicate.type()) { + case PredicateType::EQ: + case PredicateType::IN_LIST: + return true; + default: + return false; + } +} + +bool is_bloom_filter_prunable_predicate(const ColumnPredicate& predicate) { + if (dynamic_cast(&predicate) != nullptr || + is_null_only_predicate(predicate)) { + return false; + } + return predicate.can_do_bloom_filter(false); +} + +template +T load_predicate_value(const char* data) { + T value; + memcpy(&value, data, sizeof(T)); + return value; +} + +class ArrowParquetBloomFilterAdapter final : public segment_v2::BloomFilter { +public: + ArrowParquetBloomFilterAdapter(const ParquetColumnSchema& column_schema, + const ::parquet::BloomFilter& bloom_filter) + : _column_schema(column_schema), _bloom_filter(bloom_filter) {} + + void add_bytes(const char* buf, size_t size) override { DORIS_CHECK(false); } + + bool test_bytes(const char* buf, size_t size) const override { + if (buf == nullptr) { + return true; + } + switch (physical_filter_type(_column_schema)) { + case TYPE_BOOLEAN: + return test_boolean(buf, size); + case TYPE_INT: + return test_int32(buf, size); + case TYPE_BIGINT: + return test_int64(buf, size); + case TYPE_FLOAT: + return test_float(buf, size); + case TYPE_DOUBLE: + return test_double(buf, size); + case TYPE_STRING: + return test_string(buf, size); + default: + return true; + } + } + + void set_has_null(bool has_null) override { DORIS_CHECK(!has_null); } + bool has_null() const override { return false; } + void add_hash(uint64_t hash) override { DORIS_CHECK(false); } + bool test_hash(uint64_t hash) const override { return _bloom_filter.FindHash(hash); } + +private: + bool test_boolean(const char* buf, size_t size) const { + if (size == sizeof(bool)) { + const int32_t value = load_predicate_value(buf) ? 1 : 0; + return _bloom_filter.FindHash(_bloom_filter.Hash(value)); + } + if (size == sizeof(int32_t)) { + const int32_t value = load_predicate_value(buf); + return _bloom_filter.FindHash(_bloom_filter.Hash(value != 0 ? 1 : 0)); + } + return true; + } + + bool test_int32(const char* buf, size_t size) const { + if (size == sizeof(int8_t)) { + return find_int32(static_cast(load_predicate_value(buf))); + } + if (size == sizeof(int16_t)) { + return find_int32(static_cast(load_predicate_value(buf))); + } + if (size == sizeof(int32_t)) { + return find_int32(load_predicate_value(buf)); + } + return true; + } + + bool test_int64(const char* buf, size_t size) const { + if (size != sizeof(int64_t)) { + return true; + } + const int64_t value = load_predicate_value(buf); + return _bloom_filter.FindHash(_bloom_filter.Hash(value)); + } + + bool test_float(const char* buf, size_t size) const { + if (size != sizeof(float)) { + return true; + } + const float value = load_predicate_value(buf); + return _bloom_filter.FindHash(_bloom_filter.Hash(value)); + } + + bool test_double(const char* buf, size_t size) const { + if (size != sizeof(double)) { + return true; + } + const double value = load_predicate_value(buf); + return _bloom_filter.FindHash(_bloom_filter.Hash(value)); + } + + bool test_string(const char* buf, size_t size) const { + ::parquet::ByteArray value(static_cast(size), + reinterpret_cast(buf)); + return _bloom_filter.FindHash(_bloom_filter.Hash(&value)); + } + + bool find_int32(int32_t value) const { + return _bloom_filter.FindHash(_bloom_filter.Hash(value)); + } + + const ParquetColumnSchema& _column_schema; + const ::parquet::BloomFilter& _bloom_filter; +}; + +const ParquetColumnSchema* resolve_predicate_leaf_schema( + const std::vector>& schema, + const format::FileColumnPredicateFilter& column_filter); + +bool bloom_filter_supported(const ParquetColumnSchema& column_schema) { + switch (physical_filter_type(column_schema)) { + case TYPE_BOOLEAN: + case TYPE_INT: + case TYPE_BIGINT: + case TYPE_FLOAT: + case TYPE_DOUBLE: + case TYPE_STRING: + return true; + default: + return false; + } +} + +bool bloom_filter_excludes(const ParquetColumnSchema& column_schema, + const format::FileColumnPredicateFilter& column_filter, + const ::parquet::BloomFilter& bloom_filter) { + if (!bloom_filter_supported(column_schema)) { + return false; + } + ArrowParquetBloomFilterAdapter adapter(column_schema, bloom_filter); + for (const auto& column_predicate : column_filter.predicates) { + if (column_predicate == nullptr || !is_bloom_filter_prunable_predicate(*column_predicate)) { + return false; + } + if (!column_predicate->evaluate_and(&adapter)) { + return true; + } + } + return false; +} + +struct RowGroupBloomFilterCache { + ::parquet::BloomFilterReader* bloom_filter_reader = nullptr; + std::map> column_bloom_filters; + std::set loaded_columns; + + ::parquet::BloomFilter* get(int row_group_idx, int leaf_column_id, + ParquetPruningStats* pruning_stats) { + if (bloom_filter_reader == nullptr || leaf_column_id < 0) { + return nullptr; + } + if (loaded_columns.find(leaf_column_id) == loaded_columns.end()) { + loaded_columns.insert(leaf_column_id); + try { + std::shared_ptr<::parquet::RowGroupBloomFilterReader> row_group_reader; + if (pruning_stats != nullptr) { + SCOPED_RAW_TIMER(&pruning_stats->bloom_filter_read_time); + row_group_reader = bloom_filter_reader->RowGroup(row_group_idx); + if (row_group_reader != nullptr) { + column_bloom_filters[leaf_column_id] = + row_group_reader->GetColumnBloomFilter(leaf_column_id); + } + } else { + row_group_reader = bloom_filter_reader->RowGroup(row_group_idx); + if (row_group_reader != nullptr) { + column_bloom_filters[leaf_column_id] = + row_group_reader->GetColumnBloomFilter(leaf_column_id); + } + } + } catch (const ::parquet::ParquetException&) { + return nullptr; + } catch (const std::exception&) { + return nullptr; + } + } + auto it = column_bloom_filters.find(leaf_column_id); + return it == column_bloom_filters.end() ? nullptr : it->second.get(); + } +}; + +ParquetRowGroupPruneReason bloom_filter_prune_reason( + int row_group_idx, const std::vector>& schema, + const format::FileColumnPredicateFilter& column_filter, + RowGroupBloomFilterCache* bloom_filter_cache, ParquetPruningStats* pruning_stats) { + if (bloom_filter_cache == nullptr || column_filter.predicates.empty()) { + return ParquetRowGroupPruneReason::NONE; + } + const auto* column_schema = resolve_predicate_leaf_schema(schema, column_filter); + if (column_schema == nullptr || !bloom_filter_supported(*column_schema)) { + return ParquetRowGroupPruneReason::NONE; + } + for (const auto& column_predicate : column_filter.predicates) { + if (column_predicate == nullptr || !is_bloom_filter_prunable_predicate(*column_predicate)) { + return ParquetRowGroupPruneReason::NONE; + } + } + auto* bloom_filter = + bloom_filter_cache->get(row_group_idx, column_schema->leaf_column_id, pruning_stats); + if (bloom_filter == nullptr) { + return ParquetRowGroupPruneReason::NONE; + } + return bloom_filter_excludes(*column_schema, column_filter, *bloom_filter) + ? ParquetRowGroupPruneReason::BLOOM_FILTER + : ParquetRowGroupPruneReason::NONE; +} + +bool is_dictionary_data_encoding(::parquet::Encoding::type encoding) { + return encoding == ::parquet::Encoding::PLAIN_DICTIONARY || + encoding == ::parquet::Encoding::RLE_DICTIONARY; +} + +bool is_level_encoding(::parquet::Encoding::type encoding) { + return encoding == ::parquet::Encoding::RLE || encoding == ::parquet::Encoding::BIT_PACKED; +} + +bool is_data_page_type(::parquet::PageType::type page_type) { + return page_type == ::parquet::PageType::DATA_PAGE || + page_type == ::parquet::PageType::DATA_PAGE_V2; +} + +bool is_dictionary_encoded_chunk(const ::parquet::ColumnChunkMetaData& column_metadata) { + if (!column_metadata.has_dictionary_page()) { + return false; + } + + const auto& encoding_stats = column_metadata.encoding_stats(); + if (!encoding_stats.empty()) { + bool has_dictionary_data_page = false; + for (const auto& encoding_stat : encoding_stats) { + if (!is_data_page_type(encoding_stat.page_type) || encoding_stat.count <= 0) { + continue; + } + if (!is_dictionary_data_encoding(encoding_stat.encoding)) { + return false; + } + has_dictionary_data_page = true; + } + return has_dictionary_data_page; + } + + bool has_dictionary_encoding = false; + for (const auto encoding : column_metadata.encodings()) { + if (is_dictionary_data_encoding(encoding)) { + has_dictionary_encoding = true; + continue; + } + if (!is_level_encoding(encoding)) { + return false; + } + } + return has_dictionary_encoding; +} + +bool supports_dictionary_pruning(const ParquetColumnSchema& column_schema, + const ::parquet::ColumnChunkMetaData& column_metadata, + const format::FileColumnPredicateFilter& column_filter) { + if (column_schema.kind != ParquetColumnSchemaKind::PRIMITIVE || + column_schema.descriptor == nullptr || column_schema.type == nullptr) { + return false; + } + if (!column_schema.type_descriptor.is_string_like) { + return false; + } + if (column_metadata.type() != ::parquet::Type::BYTE_ARRAY && + column_metadata.type() != ::parquet::Type::FIXED_LEN_BYTE_ARRAY) { + return false; + } + for (const auto& column_predicate : column_filter.predicates) { + if (column_predicate == nullptr || !is_supported_dictionary_predicate(*column_predicate)) { + return false; + } + } + return true; +} + +struct OwnedDictionaryWords { + std::vector values; + std::vector refs; + + void clear() { + values.clear(); + refs.clear(); + } + + void build_refs() { + refs.reserve(values.size()); + for (const auto& value : values) { + refs.emplace_back(value.data(), value.size()); + } + } +}; + +bool read_dictionary_words(::parquet::ParquetFileReader* file_reader, int row_group_idx, + int leaf_column_id, const ParquetColumnSchema& column_schema, + OwnedDictionaryWords* dict_words) { + DORIS_CHECK(dict_words != nullptr); + dict_words->clear(); + if (file_reader == nullptr || leaf_column_id < 0) { + return false; + } + + auto row_group_reader = file_reader->RowGroup(row_group_idx); + if (row_group_reader == nullptr) { + return false; + } + auto page_reader = row_group_reader->GetColumnPageReader(leaf_column_id); + if (page_reader == nullptr) { + return false; + } + + std::shared_ptr<::parquet::Page> page; + try { + page = page_reader->NextPage(); + } catch (const ::parquet::ParquetException&) { + return false; + } catch (const std::exception&) { + return false; + } + if (page == nullptr || page->type() != ::parquet::PageType::DICTIONARY_PAGE) { + return false; + } + const auto* dictionary_page = static_cast(page.get()); + if (dictionary_page->encoding() != ::parquet::Encoding::PLAIN && + dictionary_page->encoding() != ::parquet::Encoding::PLAIN_DICTIONARY) { + return false; + } + const int32_t dictionary_length = dictionary_page->num_values(); + if (dictionary_length <= 0) { + return false; + } + const auto* dictionary_data = dictionary_page->data(); + const int dictionary_size = dictionary_page->size(); + + dict_words->values.reserve(static_cast(dictionary_length)); + if (column_schema.descriptor->physical_type() == ::parquet::Type::BYTE_ARRAY) { + auto decoder = ::parquet::MakeTypedDecoder<::parquet::ByteArrayType>( + ::parquet::Encoding::PLAIN, column_schema.descriptor); + decoder->SetData(dictionary_length, dictionary_data, dictionary_size); + std::vector<::parquet::ByteArray> byte_array_values(static_cast(dictionary_length)); + if (decoder->Decode(byte_array_values.data(), dictionary_length) != dictionary_length) { + return false; + } + for (int32_t dict_idx = 0; dict_idx < dictionary_length; ++dict_idx) { + dict_words->values.emplace_back( + reinterpret_cast(byte_array_values[dict_idx].ptr), + byte_array_values[dict_idx].len); + } + dict_words->build_refs(); + return true; + } + if (column_schema.descriptor->physical_type() == ::parquet::Type::FIXED_LEN_BYTE_ARRAY) { + const int type_length = column_schema.descriptor->type_length(); + if (type_length <= 0) { + return false; + } + auto decoder = ::parquet::MakeTypedDecoder<::parquet::FLBAType>(::parquet::Encoding::PLAIN, + column_schema.descriptor); + decoder->SetData(dictionary_length, dictionary_data, dictionary_size); + std::vector<::parquet::FixedLenByteArray> flba_values( + static_cast(dictionary_length)); + if (decoder->Decode(flba_values.data(), dictionary_length) != dictionary_length) { + return false; + } + for (int32_t dict_idx = 0; dict_idx < dictionary_length; ++dict_idx) { + dict_words->values.emplace_back( + reinterpret_cast(flba_values[dict_idx].ptr), type_length); + } + dict_words->build_refs(); + return true; + } + return false; +} + +segment_v2::ZoneMap to_column_predicate_statistics(const ParquetColumnStatistics& statistics) { + segment_v2::ZoneMap predicate_statistics; + predicate_statistics.min_value = statistics.min_value; + predicate_statistics.max_value = statistics.max_value; + predicate_statistics.has_null = statistics.has_null; + predicate_statistics.has_not_null = statistics.has_not_null; + return predicate_statistics; +} + +const ParquetColumnSchema* find_child_schema_by_local_id(const ParquetColumnSchema& column_schema, + int32_t local_id) { + const auto child_it = std::ranges::find_if( + column_schema.children, [&](const std::unique_ptr& child) { + return child != nullptr && child->local_id == local_id; + }); + return child_it == column_schema.children.end() ? nullptr : child_it->get(); +} + +const ParquetColumnSchema* resolve_predicate_leaf_schema( + const std::vector>& schema, + const format::FileColumnPredicateFilter& column_filter) { + const auto file_column_id = column_filter.effective_file_column_id(); + if (!file_column_id.is_valid() || file_column_id.value() >= static_cast(schema.size())) { + return nullptr; + } + const ParquetColumnSchema* column_schema = schema[file_column_id.value()].get(); + if (column_schema == nullptr) { + return nullptr; + } + for (const auto child_local_id : column_filter.effective_file_child_id_path()) { + column_schema = find_child_schema_by_local_id(*column_schema, child_local_id); + if (column_schema == nullptr) { + return nullptr; + } + } + if (column_schema->kind != ParquetColumnSchemaKind::PRIMITIVE || + column_schema->leaf_column_id < 0 || column_schema->max_repetition_level > 0) { + return nullptr; + } + return column_schema; +} + +bool check_statistics(const format::FileColumnPredicateFilter& column_filter, + const ParquetColumnStatistics& statistics) { + if (!statistics.has_any_statistics()) { + return false; + } + + for (const auto& column_predicate : column_filter.predicates) { + if (is_null_only_predicate(*column_predicate)) { + if (!statistics.has_null_count) { + continue; + } + } else if (!statistics.has_any_statistics()) { + continue; + } + if (!column_predicate->evaluate_and(to_column_predicate_statistics(statistics))) { + return true; + } + } + return false; +} + +} // namespace + +ParquetColumnStatistics ParquetStatisticsUtils::TransformColumnStatistics( + const ParquetColumnSchema& column_schema, + const std::shared_ptr<::parquet::Statistics>& statistics, const cctz::time_zone* timezone) { + ParquetColumnStatistics result; + if (statistics == nullptr) { + return result; + } + + result.has_null = statistics->HasNullCount() && statistics->null_count() > 0; + result.has_not_null = statistics->num_values() > 0 || statistics->HasMinMax(); + result.has_null_count = statistics->HasNullCount(); + if (!result.has_not_null || !statistics->HasMinMax()) { + return result; + } + + DORIS_CHECK(column_schema.type != nullptr); + switch (statistics->physical_type()) { + case ::parquet::Type::BOOLEAN: + result.has_min_max = set_decoded_min_max<::parquet::BooleanType>( + statistics, column_schema, DecodedValueKind::BOOL, &result, timezone); + return result; + case ::parquet::Type::INT32: + result.has_min_max = set_decoded_min_max<::parquet::Int32Type>( + statistics, column_schema, decoded_value_kind(column_schema.type_descriptor), + &result, timezone); + return result; + case ::parquet::Type::INT64: + result.has_min_max = set_decoded_min_max<::parquet::Int64Type>( + statistics, column_schema, decoded_value_kind(column_schema.type_descriptor), + &result, timezone); + return result; + case ::parquet::Type::FLOAT: + result.has_min_max = set_decoded_min_max<::parquet::FloatType>( + statistics, column_schema, DecodedValueKind::FLOAT, &result, timezone); + return result; + case ::parquet::Type::DOUBLE: + result.has_min_max = set_decoded_min_max<::parquet::DoubleType>( + statistics, column_schema, DecodedValueKind::DOUBLE, &result, timezone); + return result; + case ::parquet::Type::BYTE_ARRAY: + case ::parquet::Type::FIXED_LEN_BYTE_ARRAY: + result.has_min_max = set_string_min_max(statistics, column_schema, &result, timezone); + return result; + default: + return result; + } +} + +namespace { + +ParquetRowGroupPruneReason row_group_prune_reason( + const ::parquet::RowGroupMetaData& row_group, ::parquet::ParquetFileReader* file_reader, + int row_group_idx, const std::vector>& schema, + const format::FileColumnPredicateFilter& column_filter, + RowGroupBloomFilterCache* bloom_filter_cache, ParquetPruningStats* pruning_stats, + const cctz::time_zone* timezone) { + if (column_filter.predicates.empty()) { + return ParquetRowGroupPruneReason::NONE; + } + const auto* column_schema = resolve_predicate_leaf_schema(schema, column_filter); + if (column_schema == nullptr) { + return ParquetRowGroupPruneReason::NONE; + } + DCHECK_LT(column_schema->leaf_column_id, row_group.num_columns()); + auto column_chunk = row_group.ColumnChunk(column_schema->leaf_column_id); + if (column_chunk == nullptr) { + return ParquetRowGroupPruneReason::NONE; + } + if (check_statistics(column_filter, + ParquetStatisticsUtils::TransformColumnStatistics( + *column_schema, column_chunk->statistics(), timezone))) { + return ParquetRowGroupPruneReason::STATISTICS; + } + if (!supports_dictionary_pruning(*column_schema, *column_chunk, column_filter) || + !is_dictionary_encoded_chunk(*column_chunk)) { + return bloom_filter_prune_reason(row_group_idx, schema, column_filter, bloom_filter_cache, + pruning_stats); + } + OwnedDictionaryWords dict_words; + if (!read_dictionary_words(file_reader, row_group_idx, column_schema->leaf_column_id, + *column_schema, &dict_words)) { + return bloom_filter_prune_reason(row_group_idx, schema, column_filter, bloom_filter_cache, + pruning_stats); + } + for (const auto& column_predicate : column_filter.predicates) { + if (!column_predicate->evaluate_and(dict_words.refs.data(), dict_words.refs.size())) { + return ParquetRowGroupPruneReason::DICTIONARY; + } + } + return bloom_filter_prune_reason(row_group_idx, schema, column_filter, bloom_filter_cache, + pruning_stats); +} + +void init_bloom_filter_cache(::parquet::ParquetFileReader* file_reader, bool enable_bloom_filter, + RowGroupBloomFilterCache* bloom_filter_cache) { + DORIS_CHECK(bloom_filter_cache != nullptr); + if (!enable_bloom_filter || file_reader == nullptr) { + return; + } + try { + bloom_filter_cache->bloom_filter_reader = &file_reader->GetBloomFilterReader(); + } catch (const ::parquet::ParquetException&) { + bloom_filter_cache->bloom_filter_reader = nullptr; + } catch (const std::exception&) { + bloom_filter_cache->bloom_filter_reader = nullptr; + } +} + +Status select_row_groups(const ::parquet::FileMetaData& metadata, + ::parquet::ParquetFileReader* file_reader, + const std::vector>& file_schema, + const format::FileScanRequest& request, + const std::vector* candidate_row_groups, + std::vector* selected_row_groups, bool enable_bloom_filter, + ParquetPruningStats* pruning_stats, const cctz::time_zone* timezone) { + int64_t row_group_filter_time_sink = 0; + SCOPED_RAW_TIMER(pruning_stats == nullptr ? &row_group_filter_time_sink + : &pruning_stats->row_group_filter_time); + if (selected_row_groups == nullptr) { + return Status::InvalidArgument("selected_row_groups is null"); + } + selected_row_groups->clear(); + + const int num_row_groups = metadata.num_row_groups(); + if (pruning_stats != nullptr) { + pruning_stats->total_row_groups = num_row_groups; + } + const auto candidate_size = candidate_row_groups == nullptr + ? static_cast(num_row_groups) + : candidate_row_groups->size(); + selected_row_groups->reserve(candidate_size); + for (size_t candidate_idx = 0; candidate_idx < candidate_size; ++candidate_idx) { + const int row_group_idx = candidate_row_groups == nullptr + ? static_cast(candidate_idx) + : (*candidate_row_groups)[candidate_idx]; + DORIS_CHECK(row_group_idx >= 0); + DORIS_CHECK(row_group_idx < num_row_groups); + auto row_group = metadata.RowGroup(row_group_idx); + if (row_group == nullptr) { + selected_row_groups->push_back(row_group_idx); + continue; + } + bool drop = false; + RowGroupBloomFilterCache bloom_filter_cache; + init_bloom_filter_cache(file_reader, enable_bloom_filter, &bloom_filter_cache); + for (const auto& column_filter : request.column_predicate_filters) { + const auto prune_reason = row_group_prune_reason( + *row_group, file_reader, row_group_idx, file_schema, column_filter, + &bloom_filter_cache, pruning_stats, timezone); + if (prune_reason == ParquetRowGroupPruneReason::NONE) { + continue; + } + drop = true; + if (pruning_stats != nullptr) { + pruning_stats->filtered_group_rows += row_group->num_rows(); + if (prune_reason == ParquetRowGroupPruneReason::STATISTICS) { + ++pruning_stats->filtered_row_groups_by_statistics; + } else if (prune_reason == ParquetRowGroupPruneReason::DICTIONARY) { + ++pruning_stats->filtered_row_groups_by_dictionary; + } else if (prune_reason == ParquetRowGroupPruneReason::BLOOM_FILTER) { + ++pruning_stats->filtered_row_groups_by_bloom_filter; + } + break; + } + break; + } + if (drop) { + continue; + } + selected_row_groups->push_back(row_group_idx); + } + return Status::OK(); +} + +} // namespace + +bool ParquetStatisticsUtils::BloomFilterExcludes( + const ParquetColumnSchema& column_schema, + const format::FileColumnPredicateFilter& column_filter, + const ::parquet::BloomFilter& bloom_filter) { + return bloom_filter_excludes(column_schema, column_filter, bloom_filter); +} + +Status select_row_groups_by_statistics( + const ::parquet::FileMetaData& metadata, ::parquet::ParquetFileReader* file_reader, + const std::vector>& file_schema, + const format::FileScanRequest& request, const std::vector* candidate_row_groups, + std::vector* selected_row_groups, bool enable_bloom_filter, + ParquetPruningStats* pruning_stats, const cctz::time_zone* timezone) { + return select_row_groups(metadata, file_reader, file_schema, request, candidate_row_groups, + selected_row_groups, enable_bloom_filter, pruning_stats, timezone); +} + +namespace { + +template +bool set_page_decoded_min_max(const std::shared_ptr<::parquet::ColumnIndex>& column_index, + const ParquetColumnSchema& column_schema, size_t page_idx, + DecodedValueKind value_kind, ParquetColumnStatistics* page_statistics, + const cctz::time_zone* timezone) { + const auto typed_index = + std::static_pointer_cast<::parquet::TypedColumnIndex>(column_index); + if (page_idx >= typed_index->min_values().size() || + page_idx >= typed_index->max_values().size()) { + return false; + } + if (!set_decoded_field(column_schema, value_kind, typed_index->min_values()[page_idx], + &page_statistics->min_value, timezone) || + !set_decoded_field(column_schema, value_kind, typed_index->max_values()[page_idx], + &page_statistics->max_value, timezone)) { + return false; + } + page_statistics->has_min_max = true; + return true; +} + +bool set_page_string_min_max(const std::shared_ptr<::parquet::ColumnIndex>& column_index, + const ParquetColumnSchema& column_schema, size_t page_idx, + ParquetColumnStatistics* page_statistics, + const cctz::time_zone* timezone) { + switch (column_schema.descriptor->physical_type()) { + case ::parquet::Type::BYTE_ARRAY: { + const auto typed_index = + std::static_pointer_cast<::parquet::ByteArrayColumnIndex>(column_index); + if (page_idx >= typed_index->min_values().size() || + page_idx >= typed_index->max_values().size()) { + return false; + } + const auto min = ::parquet::ByteArrayToString(typed_index->min_values()[page_idx]); + const auto max = ::parquet::ByteArrayToString(typed_index->max_values()[page_idx]); + if (!set_decoded_binary_field(column_schema, DecodedValueKind::BINARY, + StringRef(min.data(), min.size()), + &page_statistics->min_value, timezone) || + !set_decoded_binary_field(column_schema, DecodedValueKind::BINARY, + StringRef(max.data(), max.size()), + &page_statistics->max_value, timezone)) { + return false; + } + page_statistics->has_min_max = true; + return true; + } + case ::parquet::Type::FIXED_LEN_BYTE_ARRAY: { + const int type_length = column_schema.descriptor->type_length(); + if (type_length <= 0) { + return false; + } + const auto typed_index = std::static_pointer_cast<::parquet::FLBAColumnIndex>(column_index); + if (page_idx >= typed_index->min_values().size() || + page_idx >= typed_index->max_values().size()) { + return false; + } + const std::string min( + reinterpret_cast(typed_index->min_values()[page_idx].ptr), + type_length); + const std::string max( + reinterpret_cast(typed_index->max_values()[page_idx].ptr), + type_length); + if (!set_decoded_binary_field(column_schema, DecodedValueKind::FIXED_BINARY, + StringRef(min.data(), min.size()), + &page_statistics->min_value, timezone) || + !set_decoded_binary_field(column_schema, DecodedValueKind::FIXED_BINARY, + StringRef(max.data(), max.size()), + &page_statistics->max_value, timezone)) { + return false; + } + page_statistics->has_min_max = true; + return true; + } + default: + return false; + } +} + +bool set_page_min_max(const std::shared_ptr<::parquet::ColumnIndex>& column_index, + const ParquetColumnSchema& column_schema, size_t page_idx, + ParquetColumnStatistics* page_statistics, const cctz::time_zone* timezone) { + DORIS_CHECK(column_schema.type != nullptr); + switch (column_schema.descriptor->physical_type()) { + case ::parquet::Type::BOOLEAN: + return set_page_decoded_min_max<::parquet::BooleanType>(column_index, column_schema, + page_idx, DecodedValueKind::BOOL, + page_statistics, timezone); + case ::parquet::Type::INT32: + return set_page_decoded_min_max<::parquet::Int32Type>( + column_index, column_schema, page_idx, + decoded_value_kind(column_schema.type_descriptor), page_statistics, timezone); + case ::parquet::Type::INT64: + return set_page_decoded_min_max<::parquet::Int64Type>( + column_index, column_schema, page_idx, + decoded_value_kind(column_schema.type_descriptor), page_statistics, timezone); + case ::parquet::Type::FLOAT: + return set_page_decoded_min_max<::parquet::FloatType>(column_index, column_schema, page_idx, + DecodedValueKind::FLOAT, + page_statistics, timezone); + case ::parquet::Type::DOUBLE: + return set_page_decoded_min_max<::parquet::DoubleType>(column_index, column_schema, + page_idx, DecodedValueKind::DOUBLE, + page_statistics, timezone); + case ::parquet::Type::BYTE_ARRAY: + case ::parquet::Type::FIXED_LEN_BYTE_ARRAY: + return set_page_string_min_max(column_index, column_schema, page_idx, page_statistics, + timezone); + default: + return false; + } +} + +bool build_page_statistics(const std::shared_ptr<::parquet::ColumnIndex>& column_index, + const ParquetColumnSchema& column_schema, size_t page_idx, + ParquetColumnStatistics* page_statistics, + const cctz::time_zone* timezone) { + DORIS_CHECK(page_statistics != nullptr); + *page_statistics = ParquetColumnStatistics {}; + + const auto& null_pages = column_index->null_pages(); + if (!column_index->has_null_counts() || page_idx >= null_pages.size() || + page_idx >= column_index->null_counts().size()) { + return false; + } + + page_statistics->has_null_count = true; + page_statistics->has_null = column_index->null_counts()[page_idx] > 0; + page_statistics->has_not_null = !null_pages[page_idx]; + if (!page_statistics->has_not_null) { + return true; + } + return set_page_min_max(column_index, column_schema, page_idx, page_statistics, timezone); +} + +std::vector intersect_ranges(const std::vector& left, + const std::vector& right) { + std::vector result; + size_t left_idx = 0; + size_t right_idx = 0; + while (left_idx < left.size() && right_idx < right.size()) { + const int64_t left_start = left[left_idx].start; + const int64_t left_end = left_start + left[left_idx].length; + const int64_t right_start = right[right_idx].start; + const int64_t right_end = right_start + right[right_idx].length; + const int64_t start = std::max(left_start, right_start); + const int64_t end = std::min(left_end, right_end); + if (start < end) { + result.push_back(RowRange {start, end - start}); + } + if (left_end < right_end) { + ++left_idx; + } else { + ++right_idx; + } + } + return result; +} + +int64_t count_range_rows(const std::vector& ranges) { + int64_t rows = 0; + for (const auto& range : ranges) { + rows += range.length; + } + return rows; +} + +RowRange page_row_range(const ::parquet::OffsetIndex& offset_index, size_t page_idx, + int64_t row_group_rows) { + const auto& page_locations = offset_index.page_locations(); + const int64_t start = page_locations[page_idx].first_row_index; + const int64_t end = page_idx + 1 == page_locations.size() + ? row_group_rows + : page_locations[page_idx + 1].first_row_index; + DORIS_CHECK(start >= 0); + DORIS_CHECK(end >= start); + DORIS_CHECK(end <= row_group_rows); + return RowRange {start, end - start}; +} + +void append_row_range(const RowRange& range, std::vector* ranges) { + if (range.length == 0) { + return; + } + if (!ranges->empty()) { + auto& previous = ranges->back(); + if (previous.start + previous.length == range.start) { + previous.length += range.length; + return; + } + } + ranges->push_back(range); +} + +bool select_ranges_for_filter(const std::shared_ptr<::parquet::RowGroupPageIndexReader>& row_group, + const std::vector>& file_schema, + const format::FileColumnPredicateFilter& column_filter, + int64_t row_group_rows, std::vector* ranges, + const cctz::time_zone* timezone) { + if (column_filter.predicates.empty()) { + return false; + } + const auto* column_schema = resolve_predicate_leaf_schema(file_schema, column_filter); + if (column_schema == nullptr || column_schema->descriptor == nullptr) { + return false; + } + + std::shared_ptr<::parquet::ColumnIndex> column_index; + std::shared_ptr<::parquet::OffsetIndex> offset_index; + try { + column_index = row_group->GetColumnIndex(column_schema->leaf_column_id); + offset_index = row_group->GetOffsetIndex(column_schema->leaf_column_id); + } catch (const ::parquet::ParquetException&) { + return false; + } catch (const std::exception&) { + return false; + } + if (column_index == nullptr || offset_index == nullptr || + column_index->null_pages().size() != offset_index->page_locations().size()) { + return false; + } + + ranges->clear(); + const auto page_count = offset_index->page_locations().size(); + for (size_t page_idx = 0; page_idx < page_count; ++page_idx) { + ParquetColumnStatistics page_statistics; + if (!build_page_statistics(column_index, *column_schema, page_idx, &page_statistics, + timezone)) { + ranges->clear(); + return false; + } + const RowRange row_range = page_row_range(*offset_index, page_idx, row_group_rows); + if (check_statistics(column_filter, page_statistics)) { + continue; + } + append_row_range(row_range, ranges); + } + return true; +} + +bool ranges_intersect(const std::vector& ranges, const RowRange& range) { + const int64_t range_end = range.start + range.length; + for (const auto& selected_range : ranges) { + const int64_t selected_end = selected_range.start + selected_range.length; + if (selected_end <= range.start) { + continue; + } + if (selected_range.start >= range_end) { + return false; + } + return true; + } + return false; +} + +void collect_leaf_schemas(const ParquetColumnSchema& column_schema, + const format::LocalColumnIndex* projection, + std::vector* leaf_schemas) { + if (column_schema.kind == ParquetColumnSchemaKind::PRIMITIVE) { + leaf_schemas->push_back(&column_schema); + return; + } + for (const auto& child_schema : column_schema.children) { + if (!format::is_child_projected(projection, child_schema->local_id)) { + continue; + } + const auto* child_projection = + format::find_child_projection(projection, child_schema->local_id); + collect_leaf_schemas(*child_schema, child_projection, leaf_schemas); + } +} + +void collect_request_leaf_schemas( + const std::vector>& file_schema, + const format::FileScanRequest& request, + std::vector* leaf_schemas) { + std::set seen_leaf_ids; + auto collect_projection = [&](const format::LocalColumnIndex& projection) { + const int32_t local_id = projection.local_id(); + if (local_id < 0 || local_id >= static_cast(file_schema.size())) { + return; + } + std::vector projection_leaf_schemas; + collect_leaf_schemas(*file_schema[local_id], &projection, &projection_leaf_schemas); + for (const auto* leaf_schema : projection_leaf_schemas) { + DORIS_CHECK(leaf_schema != nullptr); + if (seen_leaf_ids.insert(leaf_schema->leaf_column_id).second) { + leaf_schemas->push_back(leaf_schema); + } + } + }; + for (const auto& projection : request.predicate_columns) { + collect_projection(projection); + } + for (const auto& projection : request.non_predicate_columns) { + collect_projection(projection); + } + for (const auto& column_filter : request.column_predicate_filters) { + const auto* leaf_schema = resolve_predicate_leaf_schema(file_schema, column_filter); + if (leaf_schema == nullptr) { + continue; + } + if (seen_leaf_ids.insert(leaf_schema->leaf_column_id).second) { + leaf_schemas->push_back(leaf_schema); + } + } +} + +bool build_page_skip_plan_for_leaf( + const std::shared_ptr<::parquet::RowGroupPageIndexReader>& row_group, + const ParquetColumnSchema& column_schema, const std::vector& selected_ranges, + int64_t row_group_rows, ParquetPageSkipPlan* page_skip_plan) { + DORIS_CHECK(page_skip_plan != nullptr); + *page_skip_plan = ParquetPageSkipPlan {}; + // OffsetIndex first_row_index is row-based only for non-repeated leaves. LIST/MAP/repeated + // leaves need repetition-level-aware range mapping and are intentionally left out for now. + if (column_schema.kind != ParquetColumnSchemaKind::PRIMITIVE || + column_schema.descriptor == nullptr || column_schema.leaf_column_id < 0 || + column_schema.descriptor->max_repetition_level() != 0) { + return false; + } + + std::shared_ptr<::parquet::OffsetIndex> offset_index; + try { + offset_index = row_group->GetOffsetIndex(column_schema.leaf_column_id); + } catch (const ::parquet::ParquetException&) { + return false; + } catch (const std::exception&) { + return false; + } + if (offset_index == nullptr) { + return false; + } + + const auto page_count = offset_index->page_locations().size(); + page_skip_plan->leaf_column_id = column_schema.leaf_column_id; + page_skip_plan->skipped_pages.resize(page_count); + page_skip_plan->skipped_page_compressed_sizes.resize(page_count); + const auto& page_locations = offset_index->page_locations(); + for (size_t page_idx = 0; page_idx < page_count; ++page_idx) { + const RowRange row_range = page_row_range(*offset_index, page_idx, row_group_rows); + if (row_range.length == 0 || ranges_intersect(selected_ranges, row_range)) { + continue; + } + page_skip_plan->skipped_pages[page_idx] = 1; + page_skip_plan->skipped_page_compressed_sizes[page_idx] = + page_locations[page_idx].compressed_page_size; + append_row_range(row_range, &page_skip_plan->skipped_ranges); + } + if (page_skip_plan->empty()) { + *page_skip_plan = ParquetPageSkipPlan {}; + return false; + } + return true; +} + +void build_page_skip_plans(const std::shared_ptr<::parquet::RowGroupPageIndexReader>& row_group, + const std::vector>& file_schema, + const format::FileScanRequest& request, + const std::vector& selected_ranges, int64_t row_group_rows, + std::map* page_skip_plans) { + DORIS_CHECK(page_skip_plans != nullptr); + page_skip_plans->clear(); + std::vector leaf_schemas; + collect_request_leaf_schemas(file_schema, request, &leaf_schemas); + for (const auto* leaf_schema : leaf_schemas) { + DORIS_CHECK(leaf_schema != nullptr); + ParquetPageSkipPlan page_skip_plan; + if (build_page_skip_plan_for_leaf(row_group, *leaf_schema, selected_ranges, row_group_rows, + &page_skip_plan)) { + page_skip_plans->emplace(page_skip_plan.leaf_column_id, std::move(page_skip_plan)); + } + } +} + +} // namespace + +Status select_row_group_ranges_by_page_index( + ::parquet::ParquetFileReader* file_reader, + const std::vector>& file_schema, + const format::FileScanRequest& request, int row_group_idx, int64_t row_group_rows, + std::vector* selected_ranges, std::map* page_skip_plans, + ParquetPruningStats* pruning_stats, const cctz::time_zone* timezone) { + int64_t page_index_filter_time_sink = 0; + SCOPED_RAW_TIMER(pruning_stats == nullptr ? &page_index_filter_time_sink + : &pruning_stats->page_index_filter_time); + DORIS_CHECK(selected_ranges != nullptr); + selected_ranges->clear(); + if (page_skip_plans != nullptr) { + page_skip_plans->clear(); + } + if (row_group_rows <= 0) { + return Status::OK(); + } + selected_ranges->push_back(RowRange {0, row_group_rows}); + if (!config::enable_parquet_page_index || request.column_predicate_filters.empty() || + file_reader == nullptr) { + return Status::OK(); + } + + std::shared_ptr<::parquet::PageIndexReader> page_index_reader; + std::shared_ptr<::parquet::RowGroupPageIndexReader> row_group_index_reader; + try { + if (pruning_stats != nullptr) { + ++pruning_stats->page_index_read_calls; + } + { + int64_t read_page_index_time_sink = 0; + SCOPED_RAW_TIMER(pruning_stats == nullptr ? &read_page_index_time_sink + : &pruning_stats->read_page_index_time); + page_index_reader = file_reader->GetPageIndexReader(); + if (page_index_reader == nullptr) { + return Status::OK(); + } + row_group_index_reader = page_index_reader->RowGroup(row_group_idx); + } + } catch (const ::parquet::ParquetException&) { + return Status::OK(); + } catch (const std::exception&) { + return Status::OK(); + } + if (row_group_index_reader == nullptr) { + return Status::OK(); + } + + for (const auto& column_filter : request.column_predicate_filters) { + std::vector filter_ranges; + if (!select_ranges_for_filter(row_group_index_reader, file_schema, column_filter, + row_group_rows, &filter_ranges, timezone)) { + continue; + } + *selected_ranges = intersect_ranges(*selected_ranges, filter_ranges); + if (selected_ranges->empty()) { + if (page_skip_plans != nullptr) { + page_skip_plans->clear(); + } + if (pruning_stats != nullptr) { + pruning_stats->filtered_page_rows += row_group_rows; + ++pruning_stats->filtered_row_groups_by_page_index; + } + return Status::OK(); + } + } + if (page_skip_plans != nullptr) { + build_page_skip_plans(row_group_index_reader, file_schema, request, *selected_ranges, + row_group_rows, page_skip_plans); + } + if (pruning_stats != nullptr) { + const int64_t selected_rows = count_range_rows(*selected_ranges); + DORIS_CHECK(selected_rows <= row_group_rows); + pruning_stats->filtered_page_rows += row_group_rows - selected_rows; + } + return Status::OK(); +} + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/parquet_statistics.h b/be/src/format_v2/parquet/parquet_statistics.h new file mode 100644 index 00000000000000..3d4b9d3579185d --- /dev/null +++ b/be/src/format_v2/parquet/parquet_statistics.h @@ -0,0 +1,109 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +#include "common/status.h" +#include "core/field.h" +#include "format_v2/file_reader.h" +#include "format_v2/parquet/selection_vector.h" + +namespace parquet { +class BloomFilter; +class FileMetaData; +class ParquetFileReader; +class Statistics; +} // namespace parquet + +namespace cctz { +class time_zone; +} // namespace cctz + +namespace doris { +class ColumnPredicate; +} // namespace doris + +namespace doris::format::parquet { + +struct ParquetColumnSchema; + +// ============================================================================ +// ============================================================================ + +struct ParquetPruningStats { + int64_t total_row_groups = 0; // total row groups in the file + int64_t selected_row_groups = 0; // row groups selected after pruning + int64_t filtered_row_groups_by_statistics = 0; // row groups pruned by min/max statistics + int64_t filtered_row_groups_by_dictionary = 0; // row groups pruned by dictionary + int64_t filtered_row_groups_by_bloom_filter = 0; // row groups pruned by bloom filter + int64_t filtered_row_groups_by_page_index = 0; // row groups fully pruned by page index + int64_t filtered_group_rows = 0; // rows in pruned row groups + int64_t filtered_page_rows = 0; // rows pruned by page index + int64_t selected_row_ranges = 0; // selected row range count + int64_t page_index_read_calls = 0; // Page Index read count + int64_t bloom_filter_read_time = 0; // Bloom filter read time (ns) + int64_t row_group_filter_time = 0; // row-group pruning time (ns) + int64_t page_index_filter_time = 0; // page-index pruning time (ns) + int64_t read_page_index_time = 0; // page-index read time (ns) +}; + +struct ParquetColumnStatistics { + Field min_value; // column minimum value converted to Doris type + Field max_value; // column maximum value + bool has_null = false; // whether NULL exists + bool has_not_null = false; // whether non-NULL values exist + bool has_null_count = false; // whether null_count is valid + bool has_min_max = false; // whether min/max is valid after conversion + + bool has_any_statistics() const { return has_null_count || has_min_max; } +}; + +// ============================================================================ +// ============================================================================ +// statistics(TransformColumnStatistics + check_statistics) +// -> dictionary(read_dictionary_words + predicate::evaluate_and) +// -> bloom filter(bloom_filter_prune_reason) +// ============================================================================ +struct ParquetStatisticsUtils { + static ParquetColumnStatistics TransformColumnStatistics( + const ParquetColumnSchema& column_schema, + const std::shared_ptr<::parquet::Statistics>& statistics, + const cctz::time_zone* timezone = nullptr); + + static bool BloomFilterExcludes(const ParquetColumnSchema& column_schema, + const format::FileColumnPredicateFilter& column_filter, + const ::parquet::BloomFilter& bloom_filter); +}; + +Status select_row_groups_by_statistics( + const ::parquet::FileMetaData& metadata, ::parquet::ParquetFileReader* file_reader, + const std::vector>& file_schema, + const format::FileScanRequest& request, const std::vector* candidate_row_groups, + std::vector* selected_row_groups, bool enable_bloom_filter, + ParquetPruningStats* pruning_stats, const cctz::time_zone* timezone = nullptr); + +Status select_row_group_ranges_by_page_index( + ::parquet::ParquetFileReader* file_reader, + const std::vector>& file_schema, + const format::FileScanRequest& request, int row_group_idx, int64_t row_group_rows, + std::vector* selected_ranges, std::map* page_skip_plans, + ParquetPruningStats* pruning_stats, const cctz::time_zone* timezone = nullptr); + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/parquet_type.cpp b/be/src/format_v2/parquet/parquet_type.cpp new file mode 100644 index 00000000000000..d35181d0397178 --- /dev/null +++ b/be/src/format_v2/parquet/parquet_type.cpp @@ -0,0 +1,358 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/parquet_type.h" + +#include + +#include +#include + +#include "core/data_type/data_type_factory.hpp" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type/primitive_type.h" + +namespace doris::format::parquet { +namespace { + +DataTypePtr create_type(PrimitiveType type, bool nullable, int precision = 0, int scale = 0) { + return DataTypeFactory::instance().create_data_type(type, nullable, precision, scale); +} + +PrimitiveType decimal_primitive_type(int precision) { + return precision > 38 ? TYPE_DECIMAL256 : TYPE_DECIMAL128I; +} + +void mark_decimal(const ::parquet::ColumnDescriptor* column, int precision, int scale, + ParquetTypeDescriptor* result) { + result->is_decimal = true; + result->decimal_precision = precision; + result->decimal_scale = scale; + switch (column->physical_type()) { + case ::parquet::Type::INT32: + result->extra_type_info = ParquetExtraTypeInfo::DECIMAL_INT32; + break; + case ::parquet::Type::INT64: + result->extra_type_info = ParquetExtraTypeInfo::DECIMAL_INT64; + break; + case ::parquet::Type::BYTE_ARRAY: + case ::parquet::Type::FIXED_LEN_BYTE_ARRAY: + result->extra_type_info = ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY; + break; + default: + result->extra_type_info = ParquetExtraTypeInfo::NONE; + break; + } +} + +void mark_integer(int bit_width, bool is_signed, ParquetTypeDescriptor* result) { + result->integer_bit_width = bit_width; + result->is_unsigned_integer = !is_signed; +} + +DataTypePtr converted_type_to_doris_type(const ::parquet::ColumnDescriptor* column, + ParquetTypeDescriptor* result) { + const bool nullable = column->max_definition_level() > 0; + switch (column->converted_type()) { + case ::parquet::ConvertedType::UTF8: + case ::parquet::ConvertedType::ENUM: + case ::parquet::ConvertedType::JSON: + case ::parquet::ConvertedType::BSON: + return create_type(TYPE_STRING, nullable); + case ::parquet::ConvertedType::DECIMAL: + mark_decimal(column, column->type_precision(), column->type_scale(), result); + return create_type(decimal_primitive_type(column->type_precision()), nullable, + column->type_precision(), column->type_scale()); + case ::parquet::ConvertedType::DATE: + return create_type(TYPE_DATEV2, nullable); + case ::parquet::ConvertedType::TIME_MILLIS: + result->unsupported_reason = "Parquet TIME with isAdjustedToUTC=true is not supported"; + return nullptr; + case ::parquet::ConvertedType::TIME_MICROS: + result->unsupported_reason = "Parquet TIME with isAdjustedToUTC=true is not supported"; + return nullptr; + case ::parquet::ConvertedType::TIMESTAMP_MILLIS: + result->is_timestamp = true; + result->timestamp_is_adjusted_to_utc = true; + result->time_unit = ParquetTimeUnit::MILLIS; + result->extra_type_info = ParquetExtraTypeInfo::UNIT_MS; + return create_type(TYPE_DATETIMEV2, nullable, 0, 3); + case ::parquet::ConvertedType::TIMESTAMP_MICROS: + result->is_timestamp = true; + result->timestamp_is_adjusted_to_utc = true; + result->time_unit = ParquetTimeUnit::MICROS; + result->extra_type_info = ParquetExtraTypeInfo::UNIT_MICROS; + return create_type(TYPE_DATETIMEV2, nullable, 0, 6); + // Parquet stores signed and unsigned integer logical annotations on signed physical carriers: + // INT_8/UINT_8/INT_16/UINT_16/INT_32/UINT_32 use physical INT32, and + // INT_64/UINT_64 use physical INT64. Doris maps unsigned integers to the next wider + // signed type so all values in the unsigned range can be represented. + case ::parquet::ConvertedType::INT_8: + mark_integer(8, true, result); + return create_type(TYPE_TINYINT, nullable); + case ::parquet::ConvertedType::UINT_8: + mark_integer(8, false, result); + return create_type(TYPE_SMALLINT, nullable); + case ::parquet::ConvertedType::INT_16: + mark_integer(16, true, result); + return create_type(TYPE_SMALLINT, nullable); + case ::parquet::ConvertedType::UINT_16: + mark_integer(16, false, result); + return create_type(TYPE_INT, nullable); + case ::parquet::ConvertedType::INT_32: + mark_integer(32, true, result); + return create_type(TYPE_INT, nullable); + case ::parquet::ConvertedType::UINT_32: + mark_integer(32, false, result); + return create_type(TYPE_BIGINT, nullable); + case ::parquet::ConvertedType::INT_64: + mark_integer(64, true, result); + return create_type(TYPE_BIGINT, nullable); + case ::parquet::ConvertedType::UINT_64: + mark_integer(64, false, result); + return create_type(TYPE_LARGEINT, nullable); + case ::parquet::ConvertedType::NONE: + default: + return nullptr; + } +} + +DataTypePtr logical_type_to_doris_type(const ::parquet::ColumnDescriptor* column, + ParquetTypeDescriptor* result) { + const auto& logical_type = column->logical_type(); + if (logical_type == nullptr || !logical_type->is_valid() || logical_type->is_none()) { + return nullptr; + } + const bool nullable = column->max_definition_level() > 0; + if (logical_type->is_string() || logical_type->is_enum() || logical_type->is_JSON() || + logical_type->is_BSON() || logical_type->is_UUID()) { + return create_type(TYPE_STRING, nullable); + } + if (logical_type->is_decimal()) { + const auto& decimal_type = static_cast(*logical_type); + mark_decimal(column, decimal_type.precision(), decimal_type.scale(), result); + return create_type(decimal_primitive_type(decimal_type.precision()), nullable, + decimal_type.precision(), decimal_type.scale()); + } + if (logical_type->is_date()) { + return create_type(TYPE_DATEV2, nullable); + } + if (logical_type->is_time()) { + const auto& time_type = static_cast(*logical_type); + if (time_type.is_adjusted_to_utc()) { + result->unsupported_reason = "Parquet TIME with isAdjustedToUTC=true is not supported"; + return nullptr; + } + int scale = 0; + if (time_type.time_unit() == ::parquet::LogicalType::TimeUnit::MILLIS) { + scale = 3; + result->time_unit = ParquetTimeUnit::MILLIS; + result->extra_type_info = ParquetExtraTypeInfo::UNIT_MS; + } else if (time_type.time_unit() == ::parquet::LogicalType::TimeUnit::MICROS) { + scale = 6; + result->time_unit = ParquetTimeUnit::MICROS; + result->extra_type_info = ParquetExtraTypeInfo::UNIT_MICROS; + } else { + return nullptr; + } + return create_type(TYPE_TIMEV2, nullable, 0, scale); + } + if (logical_type->is_timestamp()) { + const auto& timestamp_type = + static_cast(*logical_type); + int scale = 0; + if (timestamp_type.time_unit() == ::parquet::LogicalType::TimeUnit::MILLIS) { + scale = 3; + result->time_unit = ParquetTimeUnit::MILLIS; + result->extra_type_info = ParquetExtraTypeInfo::UNIT_MS; + } else if (timestamp_type.time_unit() == ::parquet::LogicalType::TimeUnit::MICROS) { + scale = 6; + result->time_unit = ParquetTimeUnit::MICROS; + result->extra_type_info = ParquetExtraTypeInfo::UNIT_MICROS; + } else if (timestamp_type.time_unit() == ::parquet::LogicalType::TimeUnit::NANOS) { + scale = 6; + result->time_unit = ParquetTimeUnit::NANOS; + result->extra_type_info = ParquetExtraTypeInfo::UNIT_NS; + } else { + return nullptr; + } + result->is_timestamp = true; + result->timestamp_is_adjusted_to_utc = timestamp_type.is_adjusted_to_utc(); + return create_type(TYPE_DATETIMEV2, nullable, 0, scale); + } + if (logical_type->is_int()) { + const auto& int_type = static_cast(*logical_type); + mark_integer(int_type.bit_width(), int_type.is_signed(), result); + switch (int_type.bit_width()) { + case 8: + return create_type(int_type.is_signed() ? TYPE_TINYINT : TYPE_SMALLINT, nullable); + case 16: + return create_type(int_type.is_signed() ? TYPE_SMALLINT : TYPE_INT, nullable); + case 32: + return create_type(int_type.is_signed() ? TYPE_INT : TYPE_BIGINT, nullable); + case 64: + return create_type(int_type.is_signed() ? TYPE_BIGINT : TYPE_LARGEINT, nullable); + default: + return nullptr; + } + } + if (logical_type->is_float16()) { + if (column->physical_type() != ::parquet::Type::FIXED_LEN_BYTE_ARRAY || + column->type_length() != 2) { + return nullptr; + } + result->extra_type_info = ParquetExtraTypeInfo::FLOAT16; + return create_type(TYPE_FLOAT, nullable); + } + return nullptr; +} + +DataTypePtr physical_type_to_doris_type(const ::parquet::ColumnDescriptor* column) { + const bool nullable = column->max_definition_level() > 0; + DataTypePtr type; + switch (column->physical_type()) { + case ::parquet::Type::BOOLEAN: + type = std::make_shared(); + break; + case ::parquet::Type::INT32: + type = std::make_shared(); + break; + case ::parquet::Type::INT64: + type = std::make_shared(); + break; + case ::parquet::Type::FLOAT: + type = std::make_shared(); + break; + case ::parquet::Type::DOUBLE: + type = std::make_shared(); + break; + case ::parquet::Type::BYTE_ARRAY: + case ::parquet::Type::FIXED_LEN_BYTE_ARRAY: + type = std::make_shared(); + break; + case ::parquet::Type::INT96: + type = create_type(TYPE_DATETIMEV2, nullable, 0, 6); + break; + default: + return nullptr; + } + return nullable ? make_nullable(type) : type; +} + +bool record_reader_physical_type_supported(::parquet::Type::type physical_type) { + switch (physical_type) { + case ::parquet::Type::BOOLEAN: + case ::parquet::Type::INT32: + case ::parquet::Type::INT64: + case ::parquet::Type::INT96: + case ::parquet::Type::FLOAT: + case ::parquet::Type::DOUBLE: + case ::parquet::Type::BYTE_ARRAY: + case ::parquet::Type::FIXED_LEN_BYTE_ARRAY: + return true; + default: + return false; + } +} + +} // namespace + +std::string parquet_column_name(const ::parquet::ColumnDescriptor* column) { + if (column == nullptr) { + return {}; + } + auto path = column->path(); + if (path) { + return path->ToDotString(); + } + return column->name(); +} + +ParquetTypeDescriptor resolve_parquet_type(const ::parquet::ColumnDescriptor* column) { + ParquetTypeDescriptor result; + if (column == nullptr) { + return result; + } + + result.physical_type = column->physical_type(); + result.converted_type = column->converted_type(); + result.fixed_length = column->type_length(); + + if (auto logical_type = logical_type_to_doris_type(column, &result); logical_type != nullptr) { + result.doris_type = logical_type; + } else if (!result.unsupported_reason.empty()) { + result.doris_type = nullptr; + result.supports_record_reader = false; + } else if (auto converted_type = converted_type_to_doris_type(column, &result); + converted_type != nullptr) { + result.doris_type = converted_type; + } else if (!result.unsupported_reason.empty()) { + result.doris_type = nullptr; + result.supports_record_reader = false; + } else { + result.doris_type = physical_type_to_doris_type(column); + if (result.physical_type == ::parquet::Type::INT96) { + result.extra_type_info = ParquetExtraTypeInfo::IMPALA_TIMESTAMP; + } + } + + result.is_string_like = !result.is_decimal && + result.extra_type_info != ParquetExtraTypeInfo::FLOAT16 && + (result.physical_type == ::parquet::Type::BYTE_ARRAY || + result.physical_type == ::parquet::Type::FIXED_LEN_BYTE_ARRAY); + + if (!record_reader_physical_type_supported(result.physical_type)) { + result.supports_record_reader = false; + } + return result; +} + +bool supports_record_reader(const ParquetTypeDescriptor& type_descriptor) { + return type_descriptor.supports_record_reader; +} + +DecodedValueKind decoded_value_kind(const ParquetTypeDescriptor& type_descriptor) { + switch (type_descriptor.physical_type) { + case ::parquet::Type::BOOLEAN: + return DecodedValueKind::BOOL; + case ::parquet::Type::INT32: + if (type_descriptor.is_unsigned_integer && type_descriptor.integer_bit_width == 32) { + return DecodedValueKind::UINT32; + } + return DecodedValueKind::INT32; + case ::parquet::Type::INT64: + if (type_descriptor.is_unsigned_integer && type_descriptor.integer_bit_width == 64) { + return DecodedValueKind::UINT64; + } + return DecodedValueKind::INT64; + case ::parquet::Type::INT96: + return DecodedValueKind::INT96; + case ::parquet::Type::FLOAT: + return DecodedValueKind::FLOAT; + case ::parquet::Type::DOUBLE: + return DecodedValueKind::DOUBLE; + case ::parquet::Type::FIXED_LEN_BYTE_ARRAY: + return DecodedValueKind::FIXED_BINARY; + case ::parquet::Type::BYTE_ARRAY: + default: + return DecodedValueKind::BINARY; + } +} + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/parquet_type.h b/be/src/format_v2/parquet/parquet_type.h new file mode 100644 index 00000000000000..5d21aae6bae092 --- /dev/null +++ b/be/src/format_v2/parquet/parquet_type.h @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include + +#include "core/data_type/data_type.h" +#include "core/data_type_serde/decoded_column_view.h" + +namespace parquet { +class ColumnDescriptor; +} // namespace parquet + +namespace doris::format::parquet { + +// ============================================================================ +// ============================================================================ + +enum class ParquetExtraTypeInfo { + NONE, // no special encoding; read by physical type + DECIMAL_INT32, // decimal stored as a 4-byte big-endian int + DECIMAL_INT64, // decimal stored as an 8-byte big-endian int + DECIMAL_BYTE_ARRAY, // decimal stored as a variable/fixed-length big-endian byte array + UNIT_MS, // time unit is milliseconds + UNIT_MICROS, // time unit is microseconds + UNIT_NS, // time unit is nanoseconds + IMPALA_TIMESTAMP, // Impala-compatible timestamp encoded as INT96 + FLOAT16, // half-precision float (FIXED_LEN_BYTE_ARRAY(2) -> Float32) +}; + +enum class ParquetTimeUnit { + UNKNOWN, + MILLIS, + MICROS, + NANOS, +}; + +// ============================================================================ +// ============================================================================ +struct ParquetTypeDescriptor { + DataTypePtr doris_type; + ParquetExtraTypeInfo extra_type_info = ParquetExtraTypeInfo::NONE; + ParquetTimeUnit time_unit = ParquetTimeUnit::UNKNOWN; + ::parquet::Type::type physical_type = ::parquet::Type::UNDEFINED; + ::parquet::ConvertedType::type converted_type = ::parquet::ConvertedType::UNDEFINED; + int integer_bit_width = -1; // bit width for INT_8/16/32/64 + int decimal_precision = -1; // precision for DECIMAL(p,s) + int decimal_scale = -1; // scale for DECIMAL(p,s) + int fixed_length = -1; // fixed length for FIXED_LEN_BYTE_ARRAY + bool is_unsigned_integer = false; // whether the integer is unsigned (UINT_8/16/32/64) + bool is_decimal = false; // whether this is a decimal type + bool is_timestamp = false; // whether this is a timestamp type + bool timestamp_is_adjusted_to_utc = false; // whether the timestamp is UTC-normalized + bool is_string_like = false; // binary type that is neither decimal nor FLOAT16 + bool supports_record_reader = true; // whether Arrow RecordReader can read this type + std::string unsupported_reason; // non-empty when this Parquet logical type is unsupported +}; + +std::string parquet_column_name(const ::parquet::ColumnDescriptor* column); + +ParquetTypeDescriptor resolve_parquet_type(const ::parquet::ColumnDescriptor* column); + +bool supports_record_reader(const ParquetTypeDescriptor& type_descriptor); + +DecodedValueKind decoded_value_kind(const ParquetTypeDescriptor& type_descriptor); + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/reader/column_reader.cpp b/be/src/format_v2/parquet/reader/column_reader.cpp new file mode 100644 index 00000000000000..9b7577e5521ea8 --- /dev/null +++ b/be/src/format_v2/parquet/reader/column_reader.cpp @@ -0,0 +1,625 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/reader/column_reader.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_struct.h" +#include "format_v2/file_reader.h" +#include "format_v2/parquet/parquet_column_schema.h" +#include "format_v2/parquet/reader/global_rowid_column_reader.h" +#include "format_v2/parquet/reader/list_column_reader.h" +#include "format_v2/parquet/reader/map_column_reader.h" +#include "format_v2/parquet/reader/row_position_column_reader.h" +#include "format_v2/parquet/reader/scalar_column_reader.h" +#include "format_v2/parquet/reader/struct_column_reader.h" +#include "runtime/runtime_profile.h" + +namespace doris::format::parquet { +namespace { + +class DataPageSkipFilter { +public: + DataPageSkipFilter(const ParquetPageSkipPlan* page_skip_plan, + ParquetPageSkipProfile page_skip_profile) + : _page_skip_plan(page_skip_plan), _page_skip_profile(page_skip_profile) { + DORIS_CHECK(_page_skip_plan != nullptr); + } + + bool operator()(const ::parquet::DataPageStats&) { + // Arrow invokes this callback once for each DATA_PAGE/DATA_PAGE_V2 and never for + // dictionary pages, so this ordinal matches Parquet OffsetIndex page locations. + const size_t page_idx = _next_data_page_idx++; + const bool skip = _page_skip_plan->should_skip_page(page_idx); + if (!skip) { + return false; + } + update_skip_profile(page_idx); + return true; + } + +private: + void update_skip_profile(size_t page_idx) const { + if (_page_skip_profile.skipped_pages != nullptr) { + COUNTER_UPDATE(_page_skip_profile.skipped_pages, 1); + } + if (_page_skip_profile.skipped_bytes != nullptr) { + COUNTER_UPDATE(_page_skip_profile.skipped_bytes, + _page_skip_plan->skipped_page_compressed_size(page_idx)); + } + } + + const ParquetPageSkipPlan* _page_skip_plan = nullptr; + ParquetPageSkipProfile _page_skip_profile; + size_t _next_data_page_idx = 0; +}; + +const ParquetPageSkipPlan* find_page_skip_plan( + const std::map* page_skip_plans, int leaf_column_id) { + if (page_skip_plans == nullptr) { + return nullptr; + } + const auto plan_it = page_skip_plans->find(leaf_column_id); + return plan_it == page_skip_plans->end() ? nullptr : &plan_it->second; +} + +void install_data_page_filter(std::unique_ptr<::parquet::PageReader>& page_reader, + const std::map* page_skip_plans, + int leaf_column_id, ParquetPageSkipProfile page_skip_profile) { + DORIS_CHECK(page_reader != nullptr); + const ParquetPageSkipPlan* page_skip_plan = + find_page_skip_plan(page_skip_plans, leaf_column_id); + if (page_skip_plan == nullptr) { + return; + } + page_reader->set_data_page_filter(DataPageSkipFilter(page_skip_plan, page_skip_profile)); +} + +bool supports_nested_scalar_record_reader(const ParquetColumnSchema& column_schema) { + if (column_schema.type_descriptor.supports_record_reader) { + return true; + } + const auto& type_descriptor = column_schema.type_descriptor; + if ((type_descriptor.extra_type_info != ParquetExtraTypeInfo::NONE && + type_descriptor.extra_type_info != ParquetExtraTypeInfo::FLOAT16) || + type_descriptor.is_decimal || type_descriptor.is_timestamp || + type_descriptor.is_string_like) { + return false; + } + if (type_descriptor.converted_type != ::parquet::ConvertedType::NONE && + type_descriptor.converted_type != ::parquet::ConvertedType::UNDEFINED) { + return false; + } + switch (type_descriptor.physical_type) { + case ::parquet::Type::BOOLEAN: + case ::parquet::Type::INT32: + case ::parquet::Type::INT64: + case ::parquet::Type::FLOAT: + case ::parquet::Type::DOUBLE: + return true; + default: + return false; + } + return true; +} + +} // namespace + +Status ParquetColumnReader::skip(int64_t rows) { + return Status::NotSupported("Parquet column skip is not implemented, rows={}", rows); +} + +void ParquetColumnReader::advance_nested_build_level_cursor_past_parent( + int16_t parent_repetition_level) { + int64_t child_cursor = nested_build_level_cursor(); + const auto& child_rep_levels = nested_repetition_levels(); + const int64_t child_levels_written = nested_levels_written(); + while (child_cursor < child_levels_written) { + const int16_t child_rep_level = child_rep_levels[child_cursor]; + ++child_cursor; + if (!is_or_has_repeated_child() || child_rep_level <= parent_repetition_level) { + break; + } + } + set_nested_build_level_cursor(child_cursor); +} + +void ParquetColumnReader::update_reader_read_rows(int64_t rows) const { + if (_profile.reader_read_rows != nullptr) { + COUNTER_UPDATE(_profile.reader_read_rows, rows); + } +} + +void ParquetColumnReader::update_reader_skip_rows(int64_t rows) const { + if (_profile.reader_skip_rows != nullptr) { + COUNTER_UPDATE(_profile.reader_skip_rows, rows); + } +} + +Status ParquetColumnReader::select(const SelectionVector& sel, uint16_t selected_rows, + int64_t batch_rows, MutableColumnPtr& column) { + if (column.get() == nullptr) { + return Status::InvalidArgument("Parquet selected read result is null for column {}", + name()); + } + RETURN_IF_ERROR(sel.verify(selected_rows, batch_rows)); + + const auto ranges = selection_to_ranges(sel, selected_rows); + int64_t cursor = 0; + for (const auto& range : ranges) { + if (range.start < cursor || range.start + range.length > batch_rows) { + return Status::InvalidArgument("Invalid parquet selection range [{}, {}) for column {}", + range.start, range.start + range.length, name()); + } + RETURN_IF_ERROR(skip(range.start - cursor)); + + int64_t range_rows_read = 0; + RETURN_IF_ERROR(read(range.length, column, &range_rows_read)); + if (range_rows_read != range.length) { + return Status::Corruption( + "Parquet selected read returned {} rows, expected {} rows for column {}", + range_rows_read, range.length, name()); + } + cursor = range.start + range.length; + } + RETURN_IF_ERROR(skip(batch_rows - cursor)); + if (_profile.reader_select_rows != nullptr) { + COUNTER_UPDATE(_profile.reader_select_rows, selected_rows); + } + return Status::OK(); +} + +ParquetColumnReaderFactory::ParquetColumnReaderFactory( + std::shared_ptr<::parquet::RowGroupReader> row_group, int num_leaf_columns, + const std::map* page_skip_plans, + ParquetPageSkipProfile page_skip_profile, const cctz::time_zone* timezone, + bool enable_strict_mode, ParquetColumnReaderProfile column_reader_profile) + : _row_group(std::move(row_group)), + _record_readers(static_cast(num_leaf_columns)), + _page_skip_plans(page_skip_plans), + _page_skip_profile(page_skip_profile), + _timezone(timezone), + _enable_strict_mode(enable_strict_mode), + _column_reader_profile(column_reader_profile) {} + +std::unique_ptr ParquetColumnReaderFactory::create_row_position_column_reader( + int64_t row_group_first_row) const { + return std::make_unique(row_group_first_row, _column_reader_profile); +} + +std::unique_ptr ParquetColumnReaderFactory::create_global_rowid_column_reader( + const format::GlobalRowIdContext& context, int64_t row_group_first_row) const { + return std::make_unique(context, row_group_first_row, + _column_reader_profile); +} + +Status ParquetColumnReaderFactory::make_scalar_column_reader( + const ParquetColumnSchema& column_schema, + std::shared_ptr<::parquet::internal::RecordReader> record_reader, bool use_page_skip_plan, + std::unique_ptr* reader) const { + if (reader == nullptr) { + return Status::InvalidArgument("reader is null"); + } + const auto* page_skip_plan = + use_page_skip_plan ? find_page_skip_plan(_page_skip_plans, column_schema.leaf_column_id) + : nullptr; + *reader = std::make_unique(column_schema, std::move(record_reader), + page_skip_plan, _timezone, _enable_strict_mode, + _column_reader_profile); + return Status::OK(); +} + +Status ParquetColumnReaderFactory::create_scalar_column_reader( + const ParquetColumnSchema& column_schema, bool is_nested, + std::unique_ptr* reader) const { + if (reader == nullptr) { + return Status::InvalidArgument("reader is null"); + } + if (!column_schema.type_descriptor.unsupported_reason.empty()) { + return Status::NotSupported("Unsupported parquet column '{}': {}", column_schema.name, + column_schema.type_descriptor.unsupported_reason); + } + if (is_nested && column_schema.kind != ParquetColumnSchemaKind::PRIMITIVE) { + return Status::InvalidArgument("Parquet nested scalar reader requires primitive column {}", + column_schema.name); + } + if (column_schema.leaf_column_id < 0 || + column_schema.leaf_column_id >= static_cast(_record_readers.size())) { + return Status::InvalidArgument("Invalid parquet leaf column id {} for column {}", + column_schema.leaf_column_id, column_schema.name); + } + if (column_schema.descriptor == nullptr) { + return Status::InvalidArgument("Parquet column descriptor is null for column {}", + column_schema.name); + } + if (!is_nested && (column_schema.descriptor->max_repetition_level() != 0 || + column_schema.descriptor->max_definition_level() > 1)) { + return Status::NotSupported( + "Current parquet scalar reader only supports flat primitive columns; column {} is " + "not supported", + column_schema.name); + } + if (is_nested && !supports_nested_scalar_record_reader(column_schema)) { + return Status::NotSupported( + "Current parquet nested scalar reader does not support column {}", + column_schema.name); + } + if (!is_nested && !column_schema.type_descriptor.supports_record_reader) { + return Status::NotSupported("Current parquet scalar reader does not support column {}", + column_schema.name); + } + std::shared_ptr<::parquet::internal::RecordReader> record_reader; + // Nested readers implement skip() by materializing rows into a scratch column. If Arrow + // page filtering is also installed, those scratch reads can consume the next selected row + // after a page-index range gap. Keep page filtering on flat scalar readers only. + RETURN_IF_ERROR(get_record_reader(column_schema.leaf_column_id, column_schema.descriptor, + column_schema.name, !is_nested, &record_reader)); + return make_scalar_column_reader(column_schema, std::move(record_reader), !is_nested, reader); +} + +// 1. RowGroupReader::GetColumnPageReader(leaf_column_id) -> Arrow PageReader +Status ParquetColumnReaderFactory::get_record_reader( + int leaf_column_id, const ::parquet::ColumnDescriptor* descriptor, const std::string& name, + bool install_page_filter, + std::shared_ptr<::parquet::internal::RecordReader>* reader) const { + if (reader == nullptr) { + return Status::InvalidArgument("reader is null"); + } + if (_row_group == nullptr) { + return Status::InternalError("Parquet row group reader is not initialized for column {}", + name); + } + if (leaf_column_id < 0 || leaf_column_id >= static_cast(_record_readers.size())) { + return Status::InvalidArgument("Invalid parquet leaf column id {} for column {}", + leaf_column_id, name); + } + if (descriptor == nullptr) { + return Status::InvalidArgument("Parquet column descriptor is null for column {}", name); + } + if (_record_readers[leaf_column_id] == nullptr) { + try { + auto page_reader = _row_group->GetColumnPageReader(leaf_column_id); + if (install_page_filter) { + install_data_page_filter(page_reader, _page_skip_plans, leaf_column_id, + _page_skip_profile); + } + const auto level_info = ::parquet::internal::LevelInfo::ComputeLevelInfo(descriptor); + _record_readers[leaf_column_id] = ::parquet::internal::RecordReader::Make( + descriptor, level_info, ::arrow::default_memory_pool(), + /*read_dictionary=*/false, + /*read_dense_for_nullable=*/false); + _record_readers[leaf_column_id]->SetPageReader(std::move(page_reader)); + } catch (const ::parquet::ParquetException& e) { + return Status::Corruption("Failed to create parquet record reader for column {}: {}", + name, e.what()); + } catch (const std::exception& e) { + return Status::InternalError("Failed to create parquet record reader for column {}: {}", + name, e.what()); + } + } + if (_record_readers[leaf_column_id] == nullptr) { + return Status::Corruption("Failed to create parquet record reader for column {}", name); + } + *reader = _record_readers[leaf_column_id]; + return Status::OK(); +} + +Status ParquetColumnReaderFactory::create_struct_column_reader( + const ParquetColumnSchema& column_schema, const format::LocalColumnIndex* projection, + std::unique_ptr* reader) const { + if (reader == nullptr) { + return Status::InvalidArgument("reader is null"); + } + std::vector> child_readers; + child_readers.reserve(column_schema.children.size()); + std::vector child_output_indices; + child_output_indices.reserve(column_schema.children.size()); + DataTypes projected_child_types; + Strings projected_child_names; + for (size_t child_idx = 0; child_idx < column_schema.children.size(); ++child_idx) { + const auto& child_schema = column_schema.children[child_idx]; + const auto* child_projection = + format::find_child_projection(projection, child_schema->local_id); + if (!format::is_child_projected(projection, child_schema->local_id)) { + continue; + } + std::unique_ptr child_reader; + RETURN_IF_ERROR(create_column_reader(*child_schema, child_projection, true, &child_reader)); + child_output_indices.push_back(static_cast(projected_child_types.size())); + projected_child_types.push_back(make_nullable(child_reader->type())); + projected_child_names.push_back(child_reader->name()); + child_readers.push_back(std::move(child_reader)); + } + if (format::is_partial_projection(projection) && + projected_child_types.size() != projection->children.size()) { + return Status::InvalidArgument( + "Parquet STRUCT projection for column {} contains invalid child", + column_schema.name); + } + if (projected_child_types.empty() && !column_schema.children.empty()) { + return Status::NotSupported("Parquet STRUCT projection for column {} contains no children", + column_schema.name); + } + DataTypePtr type = column_schema.type; + if (format::is_partial_projection(projection)) { + type = std::make_shared(projected_child_types, projected_child_names); + if (column_schema.type != nullptr && column_schema.type->is_nullable()) { + type = make_nullable(type); + } + } + *reader = std::make_unique( + column_schema, std::move(type), std::move(child_readers), + std::move(child_output_indices), _column_reader_profile); + return Status::OK(); +} + +Status ParquetColumnReaderFactory::create_list_column_reader( + const ParquetColumnSchema& column_schema, const format::LocalColumnIndex* projection, + std::unique_ptr* reader) const { + if (reader == nullptr) { + return Status::InvalidArgument("reader is null"); + } + if (column_schema.children.size() != 1) { + return Status::NotSupported("Unsupported parquet LIST layout for column {}", + column_schema.name); + } + std::unique_ptr element_reader; + const auto& element_schema = *column_schema.children[0]; + const auto* element_projection = + format::find_child_projection(projection, element_schema.local_id); + if (format::is_partial_projection(projection) && element_projection == nullptr) { + return Status::NotSupported("Parquet LIST projection for column {} contains no element", + column_schema.name); + } + RETURN_IF_ERROR( + create_column_reader(element_schema, element_projection, true, &element_reader)); + DataTypePtr type = column_schema.type; + if (format::is_partial_projection(element_projection)) { + type = std::make_shared(element_reader->type()); + if (column_schema.type != nullptr && column_schema.type->is_nullable()) { + type = make_nullable(type); + } + } + *reader = std::make_unique(column_schema, std::move(type), + std::move(element_reader), _column_reader_profile); + return Status::OK(); +} + +Status ParquetColumnReaderFactory::create_map_column_reader( + const ParquetColumnSchema& column_schema, const format::LocalColumnIndex* projection, + std::unique_ptr* reader) const { + if (reader == nullptr) { + return Status::InvalidArgument("reader is null"); + } + if (column_schema.children.size() != 2) { + return Status::NotSupported("Unsupported parquet MAP layout for column {}", + column_schema.name); + } + const auto& key_schema = *column_schema.children[0]; + const auto& value_schema = *column_schema.children[1]; + const auto* value_projection = format::find_child_projection(projection, value_schema.local_id); + if (format::is_partial_projection(projection)) { + if (value_projection == nullptr) { + return Status::NotSupported("Parquet MAP projection for column {} contains no value", + column_schema.name); + } + for (const auto& child_projection : projection->children) { + if (child_projection.local_id() == key_schema.local_id) { + continue; + } + if (child_projection.local_id() != value_schema.local_id) { + return Status::InvalidArgument( + "Parquet MAP projection for column {} contains invalid child", + column_schema.name); + } + } + } + std::unique_ptr key_reader; + // MAP materialization always needs the full key stream. It owns entry existence, offsets and + // key equality semantics, so MAP projection is defined only as value-subtree pruning. + RETURN_IF_ERROR(create_column_reader(key_schema, nullptr, true, &key_reader)); + std::unique_ptr value_reader; + RETURN_IF_ERROR(create_column_reader(value_schema, value_projection, true, &value_reader)); + DataTypePtr type = column_schema.type; + if (format::is_partial_projection(value_projection)) { + type = std::make_shared(make_nullable(key_reader->type()), + make_nullable(value_reader->type())); + if (column_schema.type != nullptr && column_schema.type->is_nullable()) { + type = make_nullable(type); + } + } + *reader = + std::make_unique(column_schema, std::move(type), std::move(key_reader), + std::move(value_reader), _column_reader_profile); + return Status::OK(); +} + +Status ParquetColumnReaderFactory::create(const ParquetColumnSchema& column_schema, + const format::LocalColumnIndex* projection, + std::unique_ptr* reader) const { + return create_column_reader(column_schema, projection, false, reader); +} + +Status ParquetColumnReaderFactory::create_count_shape_reader( + const ParquetColumnSchema& column_schema, const format::LocalColumnIndex* projection, + std::unique_ptr* reader) const { + return create_count_shape_reader_impl(column_schema, projection, false, reader); +} + +Status ParquetColumnReaderFactory::create_count_shape_reader_impl( + const ParquetColumnSchema& column_schema, const format::LocalColumnIndex* projection, + bool is_nested, std::unique_ptr* reader) const { + if (reader == nullptr) { + return Status::InvalidArgument("reader is null"); + } + switch (column_schema.kind) { + case ParquetColumnSchemaKind::PRIMITIVE: + if (format::is_partial_projection(projection)) { + return Status::InvalidArgument("Parquet COUNT projection is invalid for column {}", + column_schema.name); + } + return create_scalar_column_reader(column_schema, is_nested, reader); + case ParquetColumnSchemaKind::STRUCT: { + if (column_schema.children.empty()) { + return Status::NotSupported("Parquet COUNT shape reader found empty STRUCT column {}", + column_schema.name); + } + const ParquetColumnSchema* child_schema = nullptr; + const format::LocalColumnIndex* child_projection = nullptr; + if (format::is_partial_projection(projection)) { + const auto child_id = projection->children[0].local_id(); + const auto child_it = std::ranges::find_if( + column_schema.children, + [&](const auto& child) { return child->local_id == child_id; }); + if (child_it == column_schema.children.end()) { + return Status::InvalidArgument( + "Parquet COUNT projection for column {} contains invalid child", + column_schema.name); + } + child_schema = child_it->get(); + child_projection = &projection->children[0]; + } else { + child_schema = column_schema.children[0].get(); + } + DORIS_CHECK(child_schema != nullptr); + return create_count_shape_reader_impl(*child_schema, child_projection, true, reader); + } + case ParquetColumnSchemaKind::LIST: { + if (column_schema.children.size() != 1) { + return Status::NotSupported("Unsupported parquet LIST layout for COUNT column {}", + column_schema.name); + } + const auto& element_schema = *column_schema.children[0]; + const auto* element_projection = + format::find_child_projection(projection, element_schema.local_id); + return create_count_shape_reader_impl(element_schema, element_projection, true, reader); + } + case ParquetColumnSchemaKind::MAP: { + if (column_schema.children.empty()) { + return Status::NotSupported("Unsupported parquet MAP layout for COUNT column {}", + column_schema.name); + } + // The key stream defines MAP entry existence and offsets. Counting top-level MAP NULL-ness + // from it avoids creating a value reader, which is the expensive path for files with huge + // MAP value strings. + return create_count_shape_reader_impl(*column_schema.children[0], nullptr, true, reader); + } + } + return Status::NotSupported("Unsupported parquet column schema kind for COUNT column {}", + column_schema.name); +} + +Status ParquetColumnReaderFactory::create_column_reader( + const ParquetColumnSchema& column_schema, const format::LocalColumnIndex* projection, + bool is_nested, std::unique_ptr* reader) const { + if (reader == nullptr) { + return Status::InvalidArgument("reader is null"); + } + switch (column_schema.kind) { + case ParquetColumnSchemaKind::PRIMITIVE: + if (is_nested) { + if (format::is_partial_projection(projection)) { + return Status::InvalidArgument("Parquet scalar projection is invalid for column {}", + column_schema.name); + } + return create_scalar_column_reader(column_schema, true, reader); + } + return create_scalar_column_reader(column_schema, false, reader); + case ParquetColumnSchemaKind::STRUCT: + return create_struct_column_reader(column_schema, projection, reader); + case ParquetColumnSchemaKind::LIST: + return create_list_column_reader(column_schema, projection, reader); + case ParquetColumnSchemaKind::MAP: + return create_map_column_reader(column_schema, projection, reader); + } + return Status::NotSupported("Unsupported parquet column schema kind for column {}", + column_schema.name); +} + +ParquetColumnReader::ParquetColumnReader(const ParquetColumnSchema& schema, const DataTypePtr type, + ParquetColumnReaderProfile profile) + : _profile(profile), + _field_id(schema.local_id), + _leaf_column_id(schema.leaf_column_id), + _nullable_definition_level(schema.nullable_definition_level), + _repeated_repetition_level(schema.repeated_repetition_level), + _definition_level(schema.definition_level), + _repetition_level(schema.repetition_level), + _repeated_ancestor_definition_level(schema.repeated_ancestor_definition_level), + _type(std::move(type)), + _name(schema.name) {} + +Status ParquetColumnReader::load_nested_batch(int64_t) { + return Status::NotSupported("Parquet nested batch load is not supported for column {}", _name); +} + +Status ParquetColumnReader::load_nested_levels_batch(int64_t) { + return Status::NotSupported("Parquet nested levels batch load is not supported for column {}", + _name); +} + +Status ParquetColumnReader::build_nested_column(int64_t, MutableColumnPtr&, int64_t*) { + return Status::NotSupported("Parquet nested column build is not supported for column {}", + _name); +} + +Status ParquetColumnReader::skip_nested_column(int64_t rows) { + auto scratch_column = _type->create_column(); + int64_t values_read = 0; + RETURN_IF_ERROR(build_nested_column(rows, scratch_column, &values_read)); + if (values_read != rows) { + return Status::Corruption("Failed to skip nested parquet column {}: skipped {} of {} rows", + _name, values_read, rows); + } + return Status::OK(); +} + +const std::vector& ParquetColumnReader::nested_definition_levels() const { + static const std::vector empty; + return empty; +} + +const std::vector& ParquetColumnReader::nested_repetition_levels() const { + static const std::vector empty; + return empty; +} + +int64_t ParquetColumnReader::nested_levels_written() const { + return 0; +} + +bool ParquetColumnReader::is_or_has_repeated_child() const { + return _repetition_level > 0; +} + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/reader/column_reader.h b/be/src/format_v2/parquet/reader/column_reader.h new file mode 100644 index 00000000000000..f439010e8830d7 --- /dev/null +++ b/be/src/format_v2/parquet/reader/column_reader.h @@ -0,0 +1,200 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "core/column/column_nullable.h" +#include "core/data_type/data_type.h" +#include "format_v2/column_data.h" +#include "format_v2/parquet/parquet_profile.h" +#include "format_v2/parquet/parquet_type.h" +#include "format_v2/parquet/selection_vector.h" +#include "runtime/runtime_profile.h" + +namespace parquet { +class ColumnDescriptor; +class RowGroupReader; + +namespace internal { +class RecordReader; +} // namespace internal +} // namespace parquet + +namespace cctz { +class time_zone; +} // namespace cctz + +namespace doris { +class IColumn; +} // namespace doris + +namespace doris::format::parquet { +struct ParquetColumnSchema; + +class ParquetColumnReader { +public: + virtual ~ParquetColumnReader() = default; + + virtual int file_column_id() const { return _field_id; } + + virtual int parquet_leaf_column_id() const { return _leaf_column_id; } + + int16_t nullable_definition_level() const { return _nullable_definition_level; } + int16_t repeated_repetition_level() const { return _repeated_repetition_level; } + + virtual const DataTypePtr& type() const { return _type; } + virtual const std::string& name() const { return _name; } + const ParquetColumnReaderProfile& profile() const { return _profile; } + + virtual Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) = 0; + + virtual Status skip(int64_t rows); + + virtual Status select(const SelectionVector& sel, uint16_t selected_rows, int64_t batch_rows, + MutableColumnPtr& column); + + virtual Status load_nested_batch(int64_t rows); + + // Shape-only load interface for COUNT(col). Implementations only guarantee that + // nested_definition_levels(), nested_repetition_levels(), and nested_levels_written() are available; + // value_indices and values_column are not guaranteed, so callers must not call build_nested_column() afterwards. + // This protocol lets the V2 aggregation path avoid Doris-side value materialization even when + // the representative ARRAY/STRUCT leaf is STRING/BINARY; normal scans still use load_nested_batch(). + virtual Status load_nested_levels_batch(int64_t rows); + + virtual Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column, + int64_t* values_read); + + virtual Status skip_nested_column(int64_t rows); + + virtual const std::vector& nested_definition_levels() const; + virtual const std::vector& nested_repetition_levels() const; + virtual int64_t nested_levels_written() const; + virtual bool is_or_has_repeated_child() const; + virtual void advance_nested_build_level_cursor_past_parent(int16_t parent_repetition_level); + + int64_t nested_build_level_cursor() const { return _nested_build_level_cursor; } + void set_nested_build_level_cursor(int64_t cursor) { + DORIS_CHECK(cursor >= 0); + _nested_build_level_cursor = cursor; + } + void reset_nested_build_level_cursor() { _nested_build_level_cursor = 0; } + +protected: + ParquetColumnReader(const ParquetColumnSchema& schema, const DataTypePtr type, + ParquetColumnReaderProfile profile = {}); + ParquetColumnReader() = default; + void update_reader_read_rows(int64_t rows) const; + void update_reader_skip_rows(int64_t rows) const; + + ParquetColumnReaderProfile _profile; + const int _field_id = -1; // child ordinal in the parent node + const int _leaf_column_id = -1; // Parquet physical leaf column id (-1 = non-leaf) + const int16_t _nullable_definition_level = + 0; // definition-level threshold where this node becomes nullable + const int16_t _repeated_repetition_level = + 0; // repetition level of the nearest repeated ancestor + const int16_t _definition_level = 0; // definition level accumulated to this node + const int16_t _repetition_level = 0; // repetition level accumulated to this node + const int16_t _repeated_ancestor_definition_level = + 0; // definition level of the nearest repeated ancestor + const DataTypePtr _type; // Doris target type + const std::string _name; // column name for error messages + int64_t _nested_build_level_cursor = 0; // nested build cursor (current level position) +}; + +class ParquetColumnReaderFactory { +public: + ParquetColumnReaderFactory(std::shared_ptr<::parquet::RowGroupReader> row_group, + int num_leaf_columns, + const std::map* page_skip_plans = nullptr, + ParquetPageSkipProfile page_skip_profile = {}, + const cctz::time_zone* timezone = nullptr, + bool enable_strict_mode = false, + ParquetColumnReaderProfile column_reader_profile = {}); + + Status create(const ParquetColumnSchema& column_schema, + const format::LocalColumnIndex* projection, + std::unique_ptr* reader) const; + + // Create a scalar reader for one representative leaf that carries the top-level column shape. + // This is used by COUNT(col): the caller needs definition/repetition levels to decide whether + // the top-level value is NULL, but must not materialize heavy payload leaves. MAP deliberately + // uses the key leaf because the key stream owns entry existence and avoids reading value pages. + Status create_count_shape_reader(const ParquetColumnSchema& column_schema, + const format::LocalColumnIndex* projection, + std::unique_ptr* reader) const; + + Status create(const ParquetColumnSchema& column_schema, + std::unique_ptr* reader) const { + return create(column_schema, nullptr, reader); + } + + std::unique_ptr create_row_position_column_reader( + int64_t row_group_first_row) const; + std::unique_ptr create_global_rowid_column_reader( + const format::GlobalRowIdContext& context, int64_t row_group_first_row) const; + +private: + Status create_scalar_column_reader(const ParquetColumnSchema& column_schema, bool is_nested, + std::unique_ptr* reader) const; + + Status create_struct_column_reader(const ParquetColumnSchema& column_schema, + const format::LocalColumnIndex* projection, + std::unique_ptr* reader) const; + + Status create_list_column_reader(const ParquetColumnSchema& column_schema, + const format::LocalColumnIndex* projection, + std::unique_ptr* reader) const; + + Status create_map_column_reader(const ParquetColumnSchema& column_schema, + const format::LocalColumnIndex* projection, + std::unique_ptr* reader) const; + + Status create_column_reader(const ParquetColumnSchema& column_schema, + const format::LocalColumnIndex* projection, bool is_nested, + std::unique_ptr* reader) const; + Status create_count_shape_reader_impl(const ParquetColumnSchema& column_schema, + const format::LocalColumnIndex* projection, + bool is_nested, + std::unique_ptr* reader) const; + + Status get_record_reader(int leaf_column_id, const ::parquet::ColumnDescriptor* descriptor, + const std::string& name, bool install_page_filter, + std::shared_ptr<::parquet::internal::RecordReader>* reader) const; + + Status make_scalar_column_reader( + const ParquetColumnSchema& column_schema, + std::shared_ptr<::parquet::internal::RecordReader> record_reader, + bool use_page_skip_plan, std::unique_ptr* reader) const; + + std::shared_ptr<::parquet::RowGroupReader> _row_group; // Arrow RowGroup reader + mutable std::vector> + _record_readers; // RecordReader cache by leaf_column_id + const std::map* _page_skip_plans = + nullptr; // page-index pruning result + ParquetPageSkipProfile _page_skip_profile; // page skip profile + const cctz::time_zone* _timezone = nullptr; // timezone + bool _enable_strict_mode = false; // strict mode + ParquetColumnReaderProfile _column_reader_profile; // column reader profile +}; +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/reader/global_rowid_column_reader.cpp b/be/src/format_v2/parquet/reader/global_rowid_column_reader.cpp new file mode 100644 index 00000000000000..82b2838ba2cbfe --- /dev/null +++ b/be/src/format_v2/parquet/reader/global_rowid_column_reader.cpp @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/reader/global_rowid_column_reader.h" + +#include + +#include "common/cast_set.h" +#include "common/consts.h" +#include "core/assert_cast.h" +#include "core/column/column_string.h" +#include "core/data_type/data_type_string.h" +#include "format_v2/parquet/parquet_column_schema.h" +#include "storage/utils.h" + +namespace doris::format::parquet { + +GlobalRowIdColumnReader::GlobalRowIdColumnReader(format::GlobalRowIdContext context, + int64_t row_group_first_row, + ParquetColumnReaderProfile profile) + : ParquetColumnReader(ParquetColumnSchema {.name = BeConsts::GLOBAL_ROWID_COL}, + std::make_shared(), profile), + _context(context), + _row_group_first_row(row_group_first_row) {} + +int GlobalRowIdColumnReader::file_column_id() const { + return format::GLOBAL_ROWID_COLUMN_ID; +} + +int GlobalRowIdColumnReader::parquet_leaf_column_id() const { + return -1; +} + +const DataTypePtr& GlobalRowIdColumnReader::type() const { + return _type; +} + +const std::string& GlobalRowIdColumnReader::name() const { + return _name; +} + +Status GlobalRowIdColumnReader::read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) { + if (column.get() == nullptr || rows_read == nullptr) { + return Status::InvalidArgument("Invalid parquet global rowid read result pointer"); + } + if (rows < 0) { + return Status::InvalidArgument("Invalid parquet global rowid read rows {}", rows); + } + for (int64_t row = 0; row < rows; ++row) { + append_row_id(cast_set(_row_group_first_row + _next_row_position + row), column); + } + _next_row_position += rows; + *rows_read = rows; + return Status::OK(); +} + +Status GlobalRowIdColumnReader::skip(int64_t rows) { + if (rows <= 0) { + return Status::OK(); + } + _next_row_position += rows; + return Status::OK(); +} + +void GlobalRowIdColumnReader::append_row_id(uint32_t row_id, MutableColumnPtr& column) const { + auto* string_column = assert_cast(column.get()); + GlobalRowLoacationV2 location(_context.version, _context.backend_id, _context.file_id, row_id); + string_column->insert_data(reinterpret_cast(&location), + sizeof(GlobalRowLoacationV2)); +} + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/reader/global_rowid_column_reader.h b/be/src/format_v2/parquet/reader/global_rowid_column_reader.h new file mode 100644 index 00000000000000..b3f71645923010 --- /dev/null +++ b/be/src/format_v2/parquet/reader/global_rowid_column_reader.h @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "format_v2/column_data.h" +#include "format_v2/parquet/reader/column_reader.h" + +namespace doris::format::parquet { + +class GlobalRowIdColumnReader final : public ParquetColumnReader { +public: + GlobalRowIdColumnReader(format::GlobalRowIdContext context, int64_t row_group_first_row, + ParquetColumnReaderProfile profile = {}); + + int file_column_id() const override; + int parquet_leaf_column_id() const override; + const DataTypePtr& type() const override; + const std::string& name() const override; + + Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override; + Status skip(int64_t rows) override; + +private: + void append_row_id(uint32_t row_id, MutableColumnPtr& column) const; + + format::GlobalRowIdContext _context; // RowId prefix (version + backend_id + file_id) + int64_t _row_group_first_row = 0; // first file row of the current row group + int64_t _next_row_position = 0; // next row position to emit +}; + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/reader/list_column_reader.cpp b/be/src/format_v2/parquet/reader/list_column_reader.cpp new file mode 100644 index 00000000000000..aaf8f6635f1af0 --- /dev/null +++ b/be/src/format_v2/parquet/reader/list_column_reader.cpp @@ -0,0 +1,203 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/reader/list_column_reader.h" + +#include +#include +#include + +#include "core/assert_cast.h" +#include "core/column/column_nullable.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_nullable.h" +#include "format_v2/parquet/reader/nested_column_materializer.h" + +namespace doris::format::parquet { +namespace { + +void remove_nullable_wrapper_if_not_expected(const DataTypePtr& output_type, + MutableColumnPtr* column) { + DORIS_CHECK(column != nullptr); + if (output_type->is_nullable()) { + return; + } + if (auto* nullable_column = check_and_get_column(**column)) { + *column = nullable_column->get_nested_column_ptr(); + } +} + +} // namespace + +Status ListColumnReader::read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) { + RETURN_IF_ERROR(load_nested_batch(rows)); + return build_nested_column(rows, column, rows_read); +} + +Status ListColumnReader::skip(int64_t rows) { + if (rows <= 0) { + return Status::OK(); + } + auto scratch_column = _type->create_column(); + RETURN_IF_ERROR(load_nested_batch(rows)); + int64_t rows_read = 0; + RETURN_IF_ERROR(build_nested_column(rows, scratch_column, &rows_read)); + if (rows_read != rows) { + return Status::Corruption("Failed to skip parquet LIST column {}: skipped {} of {} rows", + _name, rows_read, rows); + } + update_reader_skip_rows(rows); + return Status::OK(); +} + +Status ListColumnReader::load_nested_batch(int64_t rows) { + DORIS_CHECK(_element_reader != nullptr); + reset_nested_build_level_cursor(); + return _element_reader->load_nested_batch(rows); +} + +Status ListColumnReader::build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column, + int64_t* values_read) { + if (column.get() == nullptr || values_read == nullptr) { + return Status::InvalidArgument("Invalid parquet list build result pointer for column {}", + _name); + } + DORIS_CHECK(_element_reader != nullptr); + auto* array_column = array_column_from_output(column); + DORIS_CHECK(array_column != nullptr); + auto* parent_null_map = null_map_from_nullable_output(column); + auto nested_column = array_column->get_data_ptr()->assert_mutable(); + const auto& element_output_type = + assert_cast(*remove_nullable(_type)).get_nested_type(); + remove_nullable_wrapper_if_not_expected(element_output_type, &nested_column); + + const auto& def_levels = _element_reader->nested_definition_levels(); + const auto& rep_levels = _element_reader->nested_repetition_levels(); + const int64_t levels_written = _element_reader->nested_levels_written(); + std::vector entry_counts; + NullMap parent_nulls; + *values_read = 0; + int64_t level_idx = nested_build_level_cursor(); + const int16_t min_parent_definition_level = + static_cast(_definition_level - 1 - (_type->is_nullable() ? 1 : 0)); + while (level_idx < levels_written) { + const int16_t def_level = def_levels[level_idx]; + const int16_t rep_level = rep_levels[level_idx]; + const bool starts_parent = rep_level < _repetition_level; + if (starts_parent && *values_read >= length_upper_bound) { + break; + } + ++level_idx; + if (rep_level > _repetition_level || def_level < min_parent_definition_level || + (!starts_parent && def_level < _repeated_ancestor_definition_level)) { + continue; + } + if (rep_level == _repetition_level) { + if (entry_counts.empty()) { + return Status::Corruption("Invalid repeated level for parquet LIST column {}", + _name); + } + if (def_level >= _definition_level) { + ++entry_counts.back(); + } + continue; + } + + const bool parent_is_null = def_level < _definition_level - 1; + if (parent_is_null && parent_null_map == nullptr) { + return Status::Corruption("Parquet LIST column {} contains null for non-nullable LIST", + _name); + } + parent_nulls.push_back(parent_is_null); + entry_counts.push_back(def_level >= _definition_level ? 1 : 0); + ++*values_read; + } + set_nested_build_level_cursor(level_idx); + + uint64_t total_entries = 0; + int64_t child_value_count = 0; + if (!_element_reader->is_or_has_repeated_child()) { + for (const auto entry_count : entry_counts) { + total_entries += entry_count; + } + RETURN_IF_ERROR(_element_reader->build_nested_column(static_cast(total_entries), + nested_column, &child_value_count)); + } else { + uint64_t pending_entries = 0; + auto flush_pending_entries = [&]() -> Status { + if (pending_entries == 0) { + return Status::OK(); + } + int64_t span_child_value_count = 0; + RETURN_IF_ERROR(_element_reader->build_nested_column( + static_cast(pending_entries), nested_column, &span_child_value_count)); + if (span_child_value_count != static_cast(pending_entries)) { + return Status::Corruption( + "Parquet LIST column {} built {} child values, expected {}", _name, + span_child_value_count, pending_entries); + } + child_value_count += span_child_value_count; + pending_entries = 0; + return Status::OK(); + }; + + for (const auto entry_count : entry_counts) { + total_entries += entry_count; + if (entry_count > 0) { + pending_entries += entry_count; + continue; + } + RETURN_IF_ERROR(flush_pending_entries()); + _element_reader->advance_nested_build_level_cursor_past_parent(_repetition_level); + } + RETURN_IF_ERROR(flush_pending_entries()); + } + if (child_value_count != static_cast(total_entries)) { + return Status::Corruption("Parquet LIST column {} built {} child values, expected {}", + _name, child_value_count, total_entries); + } + array_column->get_data_ptr() = std::move(nested_column); + append_offsets(array_column->get_offsets(), entry_counts); + append_parent_nulls(parent_null_map, parent_nulls); + return Status::OK(); +} + +const std::vector& ListColumnReader::nested_definition_levels() const { + DORIS_CHECK(_element_reader != nullptr); + return _element_reader->nested_definition_levels(); +} + +const std::vector& ListColumnReader::nested_repetition_levels() const { + DORIS_CHECK(_element_reader != nullptr); + return _element_reader->nested_repetition_levels(); +} + +int64_t ListColumnReader::nested_levels_written() const { + DORIS_CHECK(_element_reader != nullptr); + return _element_reader->nested_levels_written(); +} + +bool ListColumnReader::is_or_has_repeated_child() const { + return true; +} + +void ListColumnReader::advance_nested_build_level_cursor_past_parent( + int16_t parent_repetition_level) { + DORIS_CHECK(_element_reader != nullptr); + ParquetColumnReader::advance_nested_build_level_cursor_past_parent(parent_repetition_level); + _element_reader->advance_nested_build_level_cursor_past_parent(parent_repetition_level); +} + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/reader/list_column_reader.h b/be/src/format_v2/parquet/reader/list_column_reader.h new file mode 100644 index 00000000000000..5a60eecacb0e3e --- /dev/null +++ b/be/src/format_v2/parquet/reader/list_column_reader.h @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +#include "format_v2/parquet/parquet_column_schema.h" +#include "format_v2/parquet/reader/column_reader.h" + +namespace doris::format::parquet { + +class ListColumnReader final : public ParquetColumnReader { +public: + ListColumnReader(const ParquetColumnSchema& schema, DataTypePtr type, + std::unique_ptr element_reader, + ParquetColumnReaderProfile profile = {}) + : ParquetColumnReader(schema, type, profile), + _element_reader(std::move(element_reader)) {} + + Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override; + Status skip(int64_t rows) override; + Status load_nested_batch(int64_t rows) override; + Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column, + int64_t* values_read) override; + const std::vector& nested_definition_levels() const override; + const std::vector& nested_repetition_levels() const override; + int64_t nested_levels_written() const override; + bool is_or_has_repeated_child() const override; + void advance_nested_build_level_cursor_past_parent(int16_t parent_repetition_level) override; + +private: + std::unique_ptr + _element_reader; // element reader (recursive; may be Scalar/Struct/List/Map) +}; + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/reader/map_column_reader.cpp b/be/src/format_v2/parquet/reader/map_column_reader.cpp new file mode 100644 index 00000000000000..90d4a867331190 --- /dev/null +++ b/be/src/format_v2/parquet/reader/map_column_reader.cpp @@ -0,0 +1,238 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/reader/map_column_reader.h" + +#include +#include +#include +#include + +#include "core/assert_cast.h" +#include "core/column/column_nullable.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "format_v2/parquet/reader/nested_column_materializer.h" +#include "format_v2/parquet/reader/scalar_column_reader.h" + +namespace doris::format::parquet { +namespace { + +void remove_nullable_wrapper_if_not_expected(const DataTypePtr& output_type, + MutableColumnPtr* column) { + DORIS_CHECK(column != nullptr); + if (output_type->is_nullable()) { + return; + } + if (auto* nullable_column = check_and_get_column(**column)) { + *column = nullable_column->get_nested_column_ptr(); + } +} + +} // namespace + +Status MapColumnReader::read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) { + RETURN_IF_ERROR(load_nested_batch(rows)); + return build_nested_column(rows, column, rows_read); +} + +Status MapColumnReader::skip(int64_t rows) { + if (rows <= 0) { + return Status::OK(); + } + auto scratch_column = _type->create_column(); + RETURN_IF_ERROR(load_nested_batch(rows)); + int64_t rows_read = 0; + RETURN_IF_ERROR(build_nested_column(rows, scratch_column, &rows_read)); + if (rows_read != rows) { + return Status::Corruption("Failed to skip parquet MAP column {}: skipped {} of {} rows", + _name, rows_read, rows); + } + update_reader_skip_rows(rows); + return Status::OK(); +} + +Status MapColumnReader::load_nested_batch(int64_t rows) { + DORIS_CHECK(_key_reader != nullptr); + DORIS_CHECK(_value_reader != nullptr); + reset_nested_build_level_cursor(); + RETURN_IF_ERROR(_key_reader->load_nested_batch(rows)); + return _value_reader->load_nested_batch(rows); +} + +Status MapColumnReader::build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column, + int64_t* values_read) { + if (column.get() == nullptr || values_read == nullptr) { + return Status::InvalidArgument("Invalid parquet map build result pointer for column {}", + _name); + } + DORIS_CHECK(_key_reader != nullptr); + DORIS_CHECK(_value_reader != nullptr); + auto* map_column = map_column_from_output(column); + DORIS_CHECK(map_column != nullptr); + auto* parent_null_map = null_map_from_nullable_output(column); + auto key_column = map_column->get_keys_ptr()->assert_mutable(); + auto value_column = map_column->get_values_ptr()->assert_mutable(); + const auto& map_output_type = assert_cast(*remove_nullable(_type)); + remove_nullable_wrapper_if_not_expected(map_output_type.get_key_type(), &key_column); + remove_nullable_wrapper_if_not_expected(map_output_type.get_value_type(), &value_column); + + const auto& def_levels = _key_reader->nested_definition_levels(); + const auto& rep_levels = _key_reader->nested_repetition_levels(); + const int64_t levels_written = _key_reader->nested_levels_written(); + + std::vector entry_counts; + std::vector map_level_indices; + NullMap parent_nulls; + *values_read = 0; + int64_t level_idx = nested_build_level_cursor(); + const int16_t min_parent_definition_level = + static_cast(_definition_level - 1 - (_type->is_nullable() ? 1 : 0)); + while (level_idx < levels_written) { + const int16_t def_level = def_levels[level_idx]; + const int16_t rep_level = rep_levels[level_idx]; + const bool starts_parent = rep_level < _repetition_level; + if (starts_parent && *values_read >= length_upper_bound) { + break; + } + const int64_t current_level_idx = level_idx; + ++level_idx; + if (rep_level > _repetition_level || def_level < min_parent_definition_level || + (!starts_parent && def_level < _repeated_ancestor_definition_level)) { + continue; + } + map_level_indices.push_back(current_level_idx); + if (rep_level == _repetition_level) { + if (entry_counts.empty()) { + return Status::Corruption("Invalid repeated level for parquet MAP column {}", + _name); + } + if (def_level >= _definition_level) { + ++entry_counts.back(); + } + continue; + } + + const bool parent_is_null = def_level < _definition_level - 1; + if (parent_is_null && parent_null_map == nullptr) { + return Status::Corruption("Parquet MAP column {} contains null for non-nullable MAP", + _name); + } + parent_nulls.push_back(parent_is_null); + entry_counts.push_back(def_level >= _definition_level ? 1 : 0); + ++*values_read; + } + set_nested_build_level_cursor(level_idx); + + uint64_t total_entries = 0; + for (const auto entry_count : entry_counts) { + total_entries += entry_count; + } + const size_t key_start = key_column->size(); + int64_t key_value_count = 0; + RETURN_IF_ERROR(_key_reader->build_nested_column(static_cast(total_entries), + key_column, &key_value_count)); + if (key_value_count != static_cast(total_entries)) { + return Status::Corruption("Parquet MAP column {} built {} keys, expected {}", _name, + key_value_count, total_entries); + } + if (const auto* nullable_key_column = check_and_get_column(*key_column); + nullable_key_column != nullptr && + nullable_key_column->has_null(key_start, nullable_key_column->size())) { + return Status::Corruption("Parquet MAP column {} contains null key", _name); + } + int64_t value_count = 0; + if (auto* scalar_value_reader = dynamic_cast(_value_reader.get())) { + const auto& value_def_levels = scalar_value_reader->nested_definition_levels(); + const auto& value_rep_levels = scalar_value_reader->nested_repetition_levels(); + const int64_t value_levels_written = scalar_value_reader->nested_levels_written(); + int64_t value_level_idx = scalar_value_reader->nested_build_level_cursor(); + for (const int64_t key_level_idx : map_level_indices) { + while (value_level_idx < value_levels_written && + (value_rep_levels[value_level_idx] > _repetition_level || + value_def_levels[value_level_idx] < min_parent_definition_level || + (value_rep_levels[value_level_idx] >= _repetition_level && + value_def_levels[value_level_idx] < _repeated_ancestor_definition_level))) { + ++value_level_idx; + } + if (value_level_idx >= value_levels_written) { + return Status::Corruption( + "Parquet MAP column {} value stream ended before key stream", _name); + } + // MAP is encoded as a repeated key/value struct. The key stream owns entry existence, + // but the value stream still has one shape slot for every consumed MAP slot. Consume + // value slots in lockstep with key slots so shape-only slots from empty/null maps do + // not become scalar values. + if (value_rep_levels[value_level_idx] != rep_levels[key_level_idx]) { + return Status::Corruption( + "Parquet MAP column {} value repetition level is not aligned with key " + "stream", + _name); + } + if (def_levels[key_level_idx] >= _definition_level) { + RETURN_IF_ERROR( + scalar_value_reader->append_nested_value(value_level_idx, value_column)); + ++value_count; + } + ++value_level_idx; + } + scalar_value_reader->set_nested_build_level_cursor(value_level_idx); + } else { + // Complex MAP values own their nested shape below the entry slot, so they can recursively + // materialize exactly one child value for each MAP entry. + RETURN_IF_ERROR(_value_reader->build_nested_column(static_cast(total_entries), + value_column, &value_count)); + } + if (value_count != static_cast(total_entries)) { + return Status::Corruption("Parquet MAP column {} built {} values, expected {}", _name, + value_count, total_entries); + } + + map_column->get_keys_ptr() = std::move(key_column); + map_column->get_values_ptr() = std::move(value_column); + append_offsets(map_column->get_offsets(), entry_counts); + append_parent_nulls(parent_null_map, parent_nulls); + return Status::OK(); +} + +const std::vector& MapColumnReader::nested_definition_levels() const { + DORIS_CHECK(_key_reader != nullptr); + return _key_reader->nested_definition_levels(); +} + +const std::vector& MapColumnReader::nested_repetition_levels() const { + DORIS_CHECK(_key_reader != nullptr); + return _key_reader->nested_repetition_levels(); +} + +int64_t MapColumnReader::nested_levels_written() const { + DORIS_CHECK(_key_reader != nullptr); + return _key_reader->nested_levels_written(); +} + +bool MapColumnReader::is_or_has_repeated_child() const { + return true; +} + +void MapColumnReader::advance_nested_build_level_cursor_past_parent( + int16_t parent_repetition_level) { + DORIS_CHECK(_key_reader != nullptr); + DORIS_CHECK(_value_reader != nullptr); + ParquetColumnReader::advance_nested_build_level_cursor_past_parent(parent_repetition_level); + _key_reader->advance_nested_build_level_cursor_past_parent(parent_repetition_level); + _value_reader->advance_nested_build_level_cursor_past_parent(parent_repetition_level); +} + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/reader/map_column_reader.h b/be/src/format_v2/parquet/reader/map_column_reader.h new file mode 100644 index 00000000000000..3e26a7a480a2a5 --- /dev/null +++ b/be/src/format_v2/parquet/reader/map_column_reader.h @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +#include "format_v2/parquet/parquet_column_schema.h" +#include "format_v2/parquet/reader/column_reader.h" + +namespace doris::format::parquet { + +// 2. build_nested_column() -> +class MapColumnReader final : public ParquetColumnReader { +public: + MapColumnReader(const ParquetColumnSchema& schema, DataTypePtr type, + std::unique_ptr key_reader, + std::unique_ptr value_reader, + ParquetColumnReaderProfile profile = {}) + : ParquetColumnReader(schema, type, profile), + _key_reader(std::move(key_reader)), + _value_reader(std::move(value_reader)) {} + + Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override; + Status skip(int64_t rows) override; + Status load_nested_batch(int64_t rows) override; + Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column, + int64_t* values_read) override; + const std::vector& nested_definition_levels() const override; + const std::vector& nested_repetition_levels() const override; + int64_t nested_levels_written() const override; + bool is_or_has_repeated_child() const override; + void advance_nested_build_level_cursor_past_parent(int16_t parent_repetition_level) override; + +private: + std::unique_ptr _key_reader; // key column reader (always read fully) + std::unique_ptr + _value_reader; // value column reader (can be pruned by projection) +}; + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/reader/nested_column_materializer.cpp b/be/src/format_v2/parquet/reader/nested_column_materializer.cpp new file mode 100644 index 00000000000000..e06b7eaaf317e7 --- /dev/null +++ b/be/src/format_v2/parquet/reader/nested_column_materializer.cpp @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/reader/nested_column_materializer.h" + +#include +#include + +#include "core/assert_cast.h" +#include "core/column/column_nullable.h" + +namespace doris::format::parquet { + +ColumnArray* array_column_from_output(MutableColumnPtr& column) { + if (auto* nullable_column = check_and_get_column(*column)) { + return assert_cast(&nullable_column->get_nested_column()); + } + return assert_cast(column.get()); +} + +ColumnMap* map_column_from_output(MutableColumnPtr& column) { + if (auto* nullable_column = check_and_get_column(*column)) { + return assert_cast(&nullable_column->get_nested_column()); + } + return assert_cast(column.get()); +} + +ColumnStruct* struct_column_from_output(MutableColumnPtr& column) { + if (auto* nullable_column = check_and_get_column(*column)) { + return assert_cast(&nullable_column->get_nested_column()); + } + return assert_cast(column.get()); +} + +NullMap* null_map_from_nullable_output(MutableColumnPtr& column) { + if (auto* nullable_column = check_and_get_column(*column)) { + return &nullable_column->get_null_map_data(); + } + return nullptr; +} + +void append_offsets(ColumnArray::Offsets64& offsets, const std::vector& entry_counts) { + offsets.reserve(offsets.size() + entry_counts.size()); + uint64_t current_offset = offsets.empty() ? 0 : offsets.back(); + for (const auto entry_count : entry_counts) { + current_offset += entry_count; + offsets.push_back(current_offset); + } +} + +void append_parent_nulls(NullMap* dst, const NullMap& src) { + if (dst == nullptr) { + return; // target column is not nullable; no null marker is needed + } + dst->insert(src.begin(), src.end()); +} + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/reader/nested_column_materializer.h b/be/src/format_v2/parquet/reader/nested_column_materializer.h new file mode 100644 index 00000000000000..90fac01eb2f5e5 --- /dev/null +++ b/be/src/format_v2/parquet/reader/nested_column_materializer.h @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "core/column/column.h" +#include "core/column/column_array.h" +#include "core/column/column_map.h" +#include "core/column/column_nullable.h" +#include "core/column/column_struct.h" + +namespace doris::format::parquet { + +// ============================================================================ +// ============================================================================ + +ColumnArray* array_column_from_output(MutableColumnPtr& column); + +ColumnMap* map_column_from_output(MutableColumnPtr& column); + +ColumnStruct* struct_column_from_output(MutableColumnPtr& column); + +NullMap* null_map_from_nullable_output(MutableColumnPtr& column); + +// offsets[i] = offsets[i-1] + entry_counts[i]. +void append_offsets(ColumnArray::Offsets64& offsets, const std::vector& entry_counts); + +void append_parent_nulls(NullMap* dst, const NullMap& src); + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/reader/parquet_leaf_reader.cpp b/be/src/format_v2/parquet/reader/parquet_leaf_reader.cpp new file mode 100644 index 00000000000000..c157ff84eef887 --- /dev/null +++ b/be/src/format_v2/parquet/reader/parquet_leaf_reader.cpp @@ -0,0 +1,728 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/reader/parquet_leaf_reader.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "core/data_type/data_type_nullable.h" +#include "core/data_type_serde/decoded_column_view.h" +#include "core/string_ref.h" +#include "runtime/runtime_profile.h" +#include "util/simd/bits.h" + +namespace doris::format::parquet { +namespace { + +DecodedTimeUnit decoded_time_unit(ParquetTimeUnit time_unit) { + switch (time_unit) { + case ParquetTimeUnit::MILLIS: + return DecodedTimeUnit::MILLIS; + case ParquetTimeUnit::MICROS: + return DecodedTimeUnit::MICROS; + case ParquetTimeUnit::NANOS: + return DecodedTimeUnit::NANOS; + case ParquetTimeUnit::UNKNOWN: + default: + return DecodedTimeUnit::UNKNOWN; + } +} + +Status decoded_fixed_value_size(const std::string& column_name, DecodedValueKind value_kind, + size_t* value_size) { + switch (value_kind) { + case DecodedValueKind::BOOL: + *value_size = sizeof(bool); + return Status::OK(); + case DecodedValueKind::INT32: + *value_size = sizeof(int32_t); + return Status::OK(); + case DecodedValueKind::UINT32: + *value_size = sizeof(uint32_t); + return Status::OK(); + case DecodedValueKind::INT64: + *value_size = sizeof(int64_t); + return Status::OK(); + case DecodedValueKind::UINT64: + *value_size = sizeof(uint64_t); + return Status::OK(); + case DecodedValueKind::INT96: + *value_size = 12; + return Status::OK(); + case DecodedValueKind::FLOAT: + *value_size = sizeof(float); + return Status::OK(); + case DecodedValueKind::DOUBLE: + *value_size = sizeof(double); + return Status::OK(); + case DecodedValueKind::BINARY: + case DecodedValueKind::FIXED_BINARY: + return Status::InvalidArgument("Parquet binary value kind has no fixed value size for {}", + column_name); + } + return Status::InternalError("Unknown decoded value kind for column {}", column_name); +} + +Status get_binary_chunks(const std::string& column_name, + ::parquet::internal::RecordReader& record_reader, + std::vector>* chunks) { + auto* binary_reader = dynamic_cast<::parquet::internal::BinaryRecordReader*>(&record_reader); + if (binary_reader == nullptr) { + return Status::InternalError("Parquet binary record reader is not available for column {}", + column_name); + } + *chunks = binary_reader->GetBuilderChunks(); + return Status::OK(); +} + +Status build_binary_values(const std::string& column_name, + const std::vector>& chunks, + int64_t records_read, const NullMap* null_map, + bool read_dense_for_nullable, std::vector* binary_values) { + std::vector compact_values; + auto* values = read_dense_for_nullable ? &compact_values : binary_values; + values->reserve(records_read); + for (const auto& chunk : chunks) { + if (chunk == nullptr) { + return Status::Corruption( + "Parquet binary record reader returned null chunk for column {}", column_name); + } + if (auto* binary_array = dynamic_cast<::arrow::BinaryArray*>(chunk.get())) { + for (int64_t row_idx = 0; row_idx < binary_array->length(); ++row_idx) { + if (binary_array->IsNull(row_idx)) { + values->emplace_back(static_cast(nullptr), 0); + continue; + } + int32_t length = 0; + const uint8_t* value = binary_array->GetValue(row_idx, &length); + values->emplace_back(reinterpret_cast(value), length); + } + } else if (auto* fixed_array = dynamic_cast<::arrow::FixedSizeBinaryArray*>(chunk.get())) { + for (int64_t row_idx = 0; row_idx < fixed_array->length(); ++row_idx) { + if (fixed_array->IsNull(row_idx)) { + values->emplace_back(static_cast(nullptr), 0); + continue; + } + values->emplace_back(reinterpret_cast(fixed_array->GetValue(row_idx)), + fixed_array->byte_width()); + } + } else { + return Status::InternalError("Unexpected Arrow binary array type for column {}", + column_name); + } + } + if (read_dense_for_nullable) { + if (null_map == nullptr || null_map->size() != static_cast(records_read)) { + return Status::Corruption( + "Invalid dense nullable parquet null map for column {}: rows={}, null_map={}", + column_name, records_read, null_map == nullptr ? 0 : null_map->size()); + } + const int64_t non_null_count = static_cast(simd::count_zero_num( + reinterpret_cast(null_map->data()), null_map->size())); + if (compact_values.size() != static_cast(non_null_count)) { + return Status::Corruption( + "Invalid dense nullable parquet binary values for column {}: values={}, " + "records={}, nulls={}", + column_name, compact_values.size(), records_read, + records_read - non_null_count); + } + binary_values->reserve(records_read); + size_t value_idx = 0; + for (int64_t record_idx = 0; record_idx < records_read; ++record_idx) { + if ((*null_map)[record_idx] != 0) { + binary_values->emplace_back(static_cast(nullptr), 0); + continue; + } + binary_values->emplace_back(compact_values[value_idx++]); + } + return Status::OK(); + } + if (binary_values->size() != static_cast(records_read)) { + return Status::Corruption( + "Invalid parquet binary record read result for column {}: rows={}, records={}", + column_name, binary_values->size(), records_read); + } + return Status::OK(); +} + +float half_to_float(uint16_t value) { + const uint32_t sign = (value & 0x8000U) << 16; + const uint32_t exponent = (value & 0x7C00U) >> 10; + const uint32_t mantissa = value & 0x03FFU; + + if (exponent == 0) { + if (mantissa == 0) { + return std::bit_cast(sign); + } + const float subnormal = std::ldexp(static_cast(mantissa), -24); + return sign == 0 ? subnormal : -subnormal; + } + if (exponent == 0x1FU) { + return std::bit_cast(sign | 0x7F800000U | (mantissa << 13)); + } + return std::bit_cast(sign | ((exponent + 112U) << 23) | (mantissa << 13)); +} + +Status build_float16_values(const std::string& column_name, + const ParquetTypeDescriptor& type_descriptor, + const std::vector& binary_values, int64_t row_count, + std::vector* float_values) { + if (type_descriptor.fixed_length != 2) { + return Status::Corruption("Invalid parquet Float16 length for column {}: {}", column_name, + type_descriptor.fixed_length); + } + if (binary_values.size() != static_cast(row_count)) { + return Status::Corruption( + "Invalid parquet Float16 value count for column {}: values={}, rows={}", + column_name, binary_values.size(), row_count); + } + float_values->resize(static_cast(row_count)); + for (int64_t row = 0; row < row_count; ++row) { + const auto& binary_value = binary_values[static_cast(row)]; + if (binary_value.data == nullptr && binary_value.size == 0) { + (*float_values)[static_cast(row)] = 0; + continue; + } + if (binary_value.data == nullptr || binary_value.size != 2) { + return Status::Corruption( + "Invalid parquet Float16 value for column {} at row {}: data={}, size={}", + column_name, row, binary_value.data == nullptr ? "null" : "non-null", + binary_value.size); + } + uint16_t raw_value = 0; + std::memcpy(&raw_value, binary_value.data, sizeof(raw_value)); + (*float_values)[static_cast(row)] = half_to_float(raw_value); + } + return Status::OK(); +} + +} // namespace + +Status ParquetLeafReader::collect_batch(::parquet::internal::RecordReader& record_reader, + ParquetLeafBatch* batch) const { + DORIS_CHECK(batch != nullptr); + batch->_def_levels = nullptr; + batch->_rep_levels = nullptr; + batch->_fixed_values = nullptr; + batch->_binary_chunks.clear(); + batch->_value_kind = decoded_value_kind(_type_descriptor); + batch->_consumed_level_count = record_reader.levels_position(); + batch->_decoded_level_count = record_reader.levels_written(); + if (_descriptor->max_definition_level() > 0) { + batch->_def_levels = record_reader.def_levels(); + } + if (_descriptor->max_repetition_level() > 0) { + batch->_rep_levels = record_reader.rep_levels(); + } + batch->_read_dense_for_nullable = record_reader.read_dense_for_nullable(); + batch->_values_written = record_reader.values_written(); + + if (!batch->is_binary_value()) { + batch->_fixed_values = record_reader.values(); + return Status::OK(); + } + + RETURN_IF_ERROR(get_binary_chunks(_name, record_reader, &batch->_binary_chunks)); + batch->_values_written = 0; + for (const auto& chunk : batch->_binary_chunks) { + if (chunk == nullptr) { + return Status::Corruption( + "Parquet binary record reader returned null chunk for column {}", _name); + } + batch->_values_written += chunk->length(); + } + return Status::OK(); +} + +Status ParquetLeafReader::collect_levels_batch(::parquet::internal::RecordReader& record_reader, + ParquetLeafBatch* batch) const { + DORIS_CHECK(batch != nullptr); + batch->_def_levels = nullptr; + batch->_rep_levels = nullptr; + batch->_fixed_values = nullptr; + batch->_binary_chunks.clear(); + batch->_value_kind = decoded_value_kind(_type_descriptor); + batch->_consumed_level_count = record_reader.levels_position(); + batch->_decoded_level_count = record_reader.levels_written(); + if (_descriptor->max_definition_level() > 0) { + batch->_def_levels = record_reader.def_levels(); + } + if (_descriptor->max_repetition_level() > 0) { + batch->_rep_levels = record_reader.rep_levels(); + } + batch->_read_dense_for_nullable = record_reader.read_dense_for_nullable(); + + // Deliberately ignore values_written(), values() and BinaryRecordReader::GetBuilderChunks(). + // COUNT(col) only needs top-level shape. Pulling binary chunks transfers Arrow builder + // ownership into Doris arrays and later into ColumnString, which is exactly the OOM-prone + // materialization path for huge MAP/ARRAY/STRUCT string payloads. + batch->_values_written = 0; + return Status::OK(); +} + +// - FLOAT16: binary -> half_to_float -> float_values +Status ParquetLeafReader::append_values(const ParquetLeafBatch& batch, int64_t row_count, + const NullMap* null_map, MutableColumnPtr& column) const { + std::vector binary_values; + std::vector spaced_values; + std::vector float_values; + DecodedColumnView view; + view.value_kind = batch._value_kind; + view.time_unit = decoded_time_unit(_type_descriptor.time_unit); + view.row_count = row_count; + view.logical_integer_bit_width = _type_descriptor.integer_bit_width; + view.logical_integer_is_signed = !_type_descriptor.is_unsigned_integer; + view.decimal_precision = _type_descriptor.decimal_precision; + view.decimal_scale = _type_descriptor.decimal_scale; + view.fixed_length = _type_descriptor.fixed_length; + view.timestamp_is_adjusted_to_utc = _type_descriptor.timestamp_is_adjusted_to_utc; + view.timezone = _timezone; + view.enable_strict_mode = _enable_strict_mode; + view.null_map = null_map == nullptr || null_map->empty() ? nullptr : null_map->data(); + const bool read_dense_for_nullable = batch._read_dense_for_nullable && view.null_map != nullptr; + + if (_type_descriptor.extra_type_info == ParquetExtraTypeInfo::FLOAT16) { + RETURN_IF_ERROR(build_binary_values(_name, batch._binary_chunks, row_count, null_map, + read_dense_for_nullable, &binary_values)); + RETURN_IF_ERROR(build_float16_values(_name, _type_descriptor, binary_values, row_count, + &float_values)); + view.value_kind = DecodedValueKind::FLOAT; + view.values = reinterpret_cast(float_values.data()); + } else if (batch.is_binary_value()) { + RETURN_IF_ERROR(build_binary_values(_name, batch._binary_chunks, row_count, null_map, + read_dense_for_nullable, &binary_values)); + view.binary_values = &binary_values; + } else if (read_dense_for_nullable) { + RETURN_IF_ERROR(build_spaced_fixed_values(batch, row_count, null_map, &spaced_values)); + view.values = spaced_values.data(); + } else { + view.values = batch._fixed_values; + } + + if (_decoded_value_appender != nullptr) { + return _decoded_value_appender(column, view); + } + + { + SCOPED_TIMER(_profile.materialization_time); + if (!_type->is_nullable()) { + if (auto* nullable_column = check_and_get_column(*column); + nullable_column != nullptr) { + auto& nested_column = nullable_column->get_nested_column(); + auto& tmp_null_map = nullable_column->get_null_map_data(); + const auto old_nested_size = nested_column.size(); + const auto old_null_map_size = tmp_null_map.size(); + auto st = _type->get_serde()->read_column_from_decoded_values(nested_column, view); + if (!st.ok()) { + nested_column.resize(old_nested_size); + return st; + } + tmp_null_map.resize(old_null_map_size + nested_column.size() - old_nested_size); + memset(tmp_null_map.data() + old_null_map_size, 0, + tmp_null_map.size() - old_null_map_size); + } else { + RETURN_IF_ERROR(_type->get_serde()->read_column_from_decoded_values(*column, view)); + } + } else { + RETURN_IF_ERROR(_type->get_serde()->read_column_from_decoded_values(*column, view)); + } + } + return Status::OK(); +} + +bool ParquetLeafBatch::is_binary_value() const { + return _value_kind == DecodedValueKind::BINARY || _value_kind == DecodedValueKind::FIXED_BINARY; +} + +Status ParquetLeafReader::build_spaced_fixed_values(const ParquetLeafBatch& batch, + int64_t row_count, const NullMap* null_map, + std::vector* spaced_values) const { + DORIS_CHECK(null_map != nullptr); + DORIS_CHECK(spaced_values != nullptr); + size_t value_size = 0; + RETURN_IF_ERROR(decoded_fixed_value_size(_name, batch._value_kind, &value_size)); + spaced_values->resize(static_cast(row_count) * value_size); + const auto non_null_count = static_cast(simd::count_zero_num( + reinterpret_cast(null_map->data()), null_map->size())); + if (batch._values_written != non_null_count) { + return Status::Corruption( + "Invalid dense nullable parquet values for column {}: values={}, records={}, " + "nulls={}", + _name, batch._values_written, row_count, row_count - non_null_count); + } + auto* dst = spaced_values->data(); + int64_t value_idx = 0; + for (int64_t record_idx = 0; record_idx < row_count; ++record_idx) { + if ((*null_map)[record_idx] != 0) { + continue; // NULL row: skip it and keep the target slot zeroed + } + std::memcpy(dst + static_cast(record_idx) * value_size, + batch._fixed_values + static_cast(value_idx) * value_size, value_size); + ++value_idx; + } + return Status::OK(); +} + +ParquetLeafReader::ParquetLeafReader( + const ::parquet::ColumnDescriptor* descriptor, ParquetTypeDescriptor type_descriptor, + DataTypePtr type, std::string name, + std::shared_ptr<::parquet::internal::RecordReader> record_reader, + ParquetColumnReaderProfile profile, const cctz::time_zone* timezone, + bool enable_strict_mode, + std::function decoded_value_appender) + : _descriptor(descriptor), + _type_descriptor(type_descriptor), + _type(std::move(type)), + _name(std::move(name)), + _record_reader(std::move(record_reader)), + _profile(profile), + _timezone(timezone), + _enable_strict_mode(enable_strict_mode), + _decoded_value_appender(std::move(decoded_value_appender)) {} + +Status ParquetLeafReader::read_batch(int64_t batch_rows, ParquetLeafBatch* batch, + int64_t* rows_read) const { + if (batch == nullptr || rows_read == nullptr) { + return Status::InvalidArgument("Invalid parquet leaf batch result pointer for column {}", + _name); + } + if (_record_reader == nullptr) { + return Status::InternalError("Parquet record reader is not initialized for column {}", + _name); + } + + try { + _record_reader->Reset(); + _record_reader->Reserve(batch_rows); + { + SCOPED_TIMER(_profile.arrow_read_records_time); + *rows_read = _record_reader->ReadRecords(batch_rows); + } + } catch (const ::parquet::ParquetException& e) { + return Status::Corruption("Failed to read parquet records for column {}: {}", _name, + e.what()); + } catch (const std::exception& e) { + return Status::InternalError("Failed to read parquet records for column {}: {}", _name, + e.what()); + } + if (*rows_read < 0 || *rows_read > batch_rows) { + return Status::Corruption("Invalid parquet record read result for column {}: {}", _name, + *rows_read); + } + return collect_batch(*_record_reader, batch); +} + +Status ParquetLeafReader::build_null_map(const ParquetLeafBatch& batch, int64_t records_read, + NullMap* null_map) const { + if (_descriptor->max_definition_level() == 0) { + return Status::OK(); + } + auto* def_levels = batch.def_levels(); + if (def_levels == nullptr && records_read > 0) { + return Status::Corruption( + "Parquet record reader returned null definition levels for nullable column {}", + _name); + } + const int16_t max_definition_level = _descriptor->max_definition_level(); + null_map->resize(records_read); + auto* __restrict dst = null_map->data(); + const auto* __restrict src = def_levels; + for (int64_t record_idx = 0; record_idx < records_read; ++record_idx) { + dst[record_idx] = src[record_idx] != max_definition_level; + } + return Status::OK(); +} + +Status ParquetLeafReader::read_nested_batch(int64_t batch_rows, int16_t value_slot_definition_level, + ParquetNestedScalarBatch* batch, + int16_t value_slot_repetition_level) const { + ParquetLeafBatch leaf_batch; + int64_t records_read = 0; + RETURN_IF_ERROR(read_batch(batch_rows, &leaf_batch, &records_read)); + return build_nested_batch_from_leaf_batch(leaf_batch, records_read, value_slot_definition_level, + batch, value_slot_repetition_level); +} + +Status ParquetLeafReader::read_nested_levels_batch(int64_t batch_rows, + ParquetNestedScalarBatch* batch) const { + if (batch == nullptr) { + return Status::InvalidArgument("Nested scalar levels batch is null for column {}", _name); + } + if (_record_reader == nullptr) { + return Status::InternalError("Parquet record reader is not initialized for column {}", + _name); + } + + int64_t records_read = 0; + ParquetLeafBatch leaf_batch; + try { + _record_reader->Reset(); + _record_reader->Reserve(batch_rows); + { + SCOPED_TIMER(_profile.arrow_read_records_time); + records_read = _record_reader->ReadRecords(batch_rows); + } + } catch (const ::parquet::ParquetException& e) { + return Status::Corruption("Failed to read parquet levels for column {}: {}", _name, + e.what()); + } catch (const std::exception& e) { + return Status::InternalError("Failed to read parquet levels for column {}: {}", _name, + e.what()); + } + if (records_read < 0 || records_read > batch_rows) { + return Status::Corruption("Invalid parquet level read result for column {}: {}", _name, + records_read); + } + RETURN_IF_ERROR(collect_levels_batch(*_record_reader, &leaf_batch)); + return build_nested_levels_batch_from_leaf_batch(leaf_batch, records_read, batch); +} + +Status ParquetLeafReader::build_nested_batch_from_leaf_batch( + const ParquetLeafBatch& leaf_batch, int64_t records_read, + int16_t value_slot_definition_level, ParquetNestedScalarBatch* batch, + int16_t value_slot_repetition_level) const { + if (batch == nullptr) { + return Status::InvalidArgument("Nested scalar batch is null for column {}", _name); + } + *batch = ParquetNestedScalarBatch(); + batch->value_slot_definition_level = value_slot_definition_level; + batch->value_slot_repetition_level = value_slot_repetition_level; + + batch->records_read = records_read; + if (_type->is_nullable() && leaf_batch.read_dense_for_nullable()) { + return Status::NotSupported( + "Dense nullable parquet nested reader is not supported for column {}", _name); + } + batch->levels_written = leaf_batch.consumed_level_count(); + const int64_t values_written = leaf_batch.values_written(); + if (batch->levels_written > leaf_batch.decoded_level_count()) { + return Status::Corruption( + "Invalid nested parquet level position for column {}: position={}, levels={}", + _name, batch->levels_written, leaf_batch.decoded_level_count()); + } + if (batch->levels_written == 0 && batch->records_read > 0 && + values_written == batch->records_read && _descriptor->max_definition_level() == 0 && + _descriptor->max_repetition_level() == 0) { + batch->levels_written = batch->records_read; + } + if (batch->levels_written < batch->records_read || values_written < 0 || + values_written > batch->levels_written) { + return Status::Corruption( + "Invalid nested parquet read result for column {}: rows={}, levels={}, values={}", + _name, batch->records_read, batch->levels_written, values_written); + } + if (batch->levels_written == 0) { + return Status::OK(); + } + + auto* def_levels = leaf_batch.def_levels(); + if (def_levels == nullptr && _descriptor->max_definition_level() > 0) { + return Status::Corruption( + "Nested parquet reader returned null definition levels for column {}", _name); + } + batch->def_levels.resize(static_cast(batch->levels_written)); + if (_descriptor->max_definition_level() == 0 || def_levels == nullptr) { + std::fill(batch->def_levels.begin(), batch->def_levels.end(), + _descriptor->max_definition_level()); + } else { + std::copy(def_levels, def_levels + batch->levels_written, batch->def_levels.begin()); + } + + auto* rep_levels = leaf_batch.rep_levels(); + if (rep_levels == nullptr && _descriptor->max_repetition_level() > 0) { + return Status::Corruption( + "Nested parquet reader returned null repetition levels for column {}", _name); + } + batch->rep_levels.resize(static_cast(batch->levels_written)); + if (_descriptor->max_repetition_level() == 0 || rep_levels == nullptr) { + std::fill(batch->rep_levels.begin(), batch->rep_levels.end(), 0); + } else { + std::copy(rep_levels, rep_levels + batch->levels_written, batch->rep_levels.begin()); + } + + const int16_t leaf_definition_level = _descriptor->max_definition_level(); + // Arrow's RecordReader may emit value placeholders for null ancestors that are below the + // Doris materialization threshold. Those slots must still advance the payload value index; + // otherwise the next defined child level points at the placeholder instead of its real value. + auto count_value_slots = [&](int16_t slot_definition_level) { + int64_t slot_count = 0; + for (int64_t level_idx = 0; level_idx < batch->levels_written; ++level_idx) { + if (batch->def_levels[level_idx] >= slot_definition_level && + batch->rep_levels[level_idx] <= value_slot_repetition_level) { + ++slot_count; + } + } + return slot_count; + }; + + const int64_t value_slot_count = count_value_slots(value_slot_definition_level); + int16_t payload_slot_definition_level = value_slot_definition_level; + int64_t payload_value_slot_count = value_slot_count; + while (payload_slot_definition_level > 0 && payload_value_slot_count < values_written) { + --payload_slot_definition_level; + payload_value_slot_count = count_value_slots(payload_slot_definition_level); + } + + int64_t leaf_value_count = 0; + for (int64_t level_idx = 0; level_idx < batch->levels_written; ++level_idx) { + if (batch->def_levels[level_idx] < value_slot_definition_level || + batch->rep_levels[level_idx] > value_slot_repetition_level) { + continue; + } + if (batch->def_levels[level_idx] == leaf_definition_level) { + ++leaf_value_count; + } + } + + enum class ValueLayout { LEVELS, VALUE_SLOTS, LEAF_VALUES, PAYLOAD_VALUE_SLOTS }; + ValueLayout value_layout = ValueLayout::LEAF_VALUES; + if (values_written == batch->levels_written) { + value_layout = ValueLayout::LEVELS; + } else if (values_written == value_slot_count) { + value_layout = ValueLayout::VALUE_SLOTS; + } else if (values_written == leaf_value_count) { + value_layout = ValueLayout::LEAF_VALUES; + } else if (values_written == payload_value_slot_count) { + value_layout = ValueLayout::PAYLOAD_VALUE_SLOTS; + } else { + return Status::Corruption( + "Nested parquet reader returned inconsistent value count for column {}: values={}, " + "levels={}, slots={}, leaf_values={}, payload_slots={}, " + "payload_slot_definition_level={}", + _name, values_written, batch->levels_written, value_slot_count, leaf_value_count, + payload_value_slot_count, payload_slot_definition_level); + } + + batch->value_indices.resize(static_cast(batch->levels_written), -1); + NullMap value_nulls(static_cast(values_written), 1); + int64_t value_idx = 0; + const int16_t decoded_slot_definition_level = value_layout == ValueLayout::PAYLOAD_VALUE_SLOTS + ? payload_slot_definition_level + : value_slot_definition_level; + for (int64_t level_idx = 0; level_idx < batch->levels_written; ++level_idx) { + if (batch->def_levels[level_idx] < decoded_slot_definition_level || + batch->rep_levels[level_idx] > value_slot_repetition_level) { + continue; + } + const bool has_leaf_value = batch->def_levels[level_idx] == leaf_definition_level; + int64_t decoded_value_idx = -1; + if (value_layout == ValueLayout::LEVELS) { + decoded_value_idx = level_idx; + } else if (value_layout == ValueLayout::VALUE_SLOTS) { + decoded_value_idx = value_idx++; + } else if (value_layout == ValueLayout::PAYLOAD_VALUE_SLOTS) { + decoded_value_idx = value_idx++; + } else { + if (!has_leaf_value) { + continue; + } + decoded_value_idx = value_idx++; + } + DORIS_CHECK(decoded_value_idx >= 0); + DORIS_CHECK(decoded_value_idx < values_written); + if (has_leaf_value) { + batch->value_indices[static_cast(level_idx)] = decoded_value_idx; + value_nulls[static_cast(decoded_value_idx)] = 0; + } + } + if (value_layout != ValueLayout::LEVELS && value_idx != values_written) { + return Status::Corruption( + "Nested parquet reader value cursor stopped early for column {}: values={}, " + "visited={}", + _name, values_written, value_idx); + } + + const auto value_type = remove_nullable(_type); + batch->values_column = value_type->create_column(); + if (values_written > 0) { + ParquetLeafReader value_reader(_descriptor, _type_descriptor, value_type, _name, + _record_reader, _profile, _timezone, _enable_strict_mode); + RETURN_IF_ERROR(value_reader.append_values(leaf_batch, values_written, &value_nulls, + batch->values_column)); + } + return Status::OK(); +} + +Status ParquetLeafReader::build_nested_levels_batch_from_leaf_batch( + const ParquetLeafBatch& leaf_batch, int64_t records_read, + ParquetNestedScalarBatch* batch) const { + if (batch == nullptr) { + return Status::InvalidArgument("Nested scalar levels batch is null for column {}", _name); + } + *batch = ParquetNestedScalarBatch(); + batch->records_read = records_read; + batch->levels_written = leaf_batch.consumed_level_count(); + if (batch->levels_written > leaf_batch.decoded_level_count()) { + return Status::Corruption( + "Invalid nested parquet level position for column {}: position={}, levels={}", + _name, batch->levels_written, leaf_batch.decoded_level_count()); + } + + // Required flat leaves do not have physical def/rep level buffers. Synthesize one level slot + // per top-level row so the COUNT(col) aggregation code can use the same shape loop. + if (batch->levels_written == 0 && batch->records_read > 0 && + _descriptor->max_definition_level() == 0 && _descriptor->max_repetition_level() == 0) { + batch->levels_written = batch->records_read; + } + if (batch->levels_written < batch->records_read) { + return Status::Corruption( + "Invalid nested parquet levels result for column {}: rows={}, levels={}", _name, + batch->records_read, batch->levels_written); + } + if (batch->levels_written == 0) { + return Status::OK(); + } + + auto* def_levels = leaf_batch.def_levels(); + if (def_levels == nullptr && _descriptor->max_definition_level() > 0) { + return Status::Corruption( + "Nested parquet reader returned null definition levels for column {}", _name); + } + batch->def_levels.resize(static_cast(batch->levels_written)); + if (_descriptor->max_definition_level() == 0 || def_levels == nullptr) { + std::fill(batch->def_levels.begin(), batch->def_levels.end(), + _descriptor->max_definition_level()); + } else { + std::copy(def_levels, def_levels + batch->levels_written, batch->def_levels.begin()); + } + + auto* rep_levels = leaf_batch.rep_levels(); + if (rep_levels == nullptr && _descriptor->max_repetition_level() > 0) { + return Status::Corruption( + "Nested parquet reader returned null repetition levels for column {}", _name); + } + batch->rep_levels.resize(static_cast(batch->levels_written)); + if (_descriptor->max_repetition_level() == 0 || rep_levels == nullptr) { + std::fill(batch->rep_levels.begin(), batch->rep_levels.end(), 0); + } else { + std::copy(rep_levels, rep_levels + batch->levels_written, batch->rep_levels.begin()); + } + return Status::OK(); +} + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/reader/parquet_leaf_reader.h b/be/src/format_v2/parquet/reader/parquet_leaf_reader.h new file mode 100644 index 00000000000000..73b0a75e019dbd --- /dev/null +++ b/be/src/format_v2/parquet/reader/parquet_leaf_reader.h @@ -0,0 +1,168 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "core/column/column.h" +#include "core/column/column_nullable.h" +#include "core/data_type_serde/decoded_column_view.h" +#include "format_v2/parquet/parquet_profile.h" +#include "format_v2/parquet/parquet_type.h" + +namespace parquet { +class ColumnDescriptor; + +namespace internal { +class RecordReader; +} // namespace internal +} // namespace parquet + +namespace cctz { +class time_zone; +} // namespace cctz + +namespace arrow { +class Array; +} // namespace arrow + +namespace doris::format::parquet { + +struct ParquetLeafReaderTestAccess; + +// Read result for a nested scalar leaf, separating Dremel-encoded shape from actual values. +// The COUNT(col) aggregation fast path consumes only records_read, levels_written, def_levels, and rep_levels. +// That path does not populate value_indices or values_column, so callers must not call build_nested_column() afterwards. +struct ParquetNestedScalarBatch { + int64_t records_read = 0; + int64_t levels_written = 0; + int16_t value_slot_definition_level = 0; + int16_t value_slot_repetition_level = std::numeric_limits::max(); + std::vector def_levels; + std::vector rep_levels; + std::vector value_indices; + MutableColumnPtr values_column; + + bool empty() const { return levels_written == 0; } +}; + +class ParquetLeafBatch { +public: + int64_t consumed_level_count() const { return _consumed_level_count; } + int64_t decoded_level_count() const { return _decoded_level_count; } + int64_t values_written() const { return _values_written; } + bool read_dense_for_nullable() const { return _read_dense_for_nullable; } + const int16_t* def_levels() const { return _def_levels; } + const int16_t* rep_levels() const { return _rep_levels; } + +private: + friend class ParquetLeafReader; + + bool is_binary_value() const; + + DecodedValueKind _value_kind = DecodedValueKind::INT32; + int64_t _consumed_level_count = 0; + int64_t _decoded_level_count = 0; + int64_t _values_written = 0; + const int16_t* _def_levels = nullptr; + const int16_t* _rep_levels = nullptr; + const uint8_t* _fixed_values = nullptr; + bool _read_dense_for_nullable = false; + std::vector> _binary_chunks; +}; + +// read_batch() -> build_null_map() + append_values() +// read_nested_batch() +class ParquetLeafReader { +public: + ParquetLeafReader(const ::parquet::ColumnDescriptor* descriptor, + ParquetTypeDescriptor type_descriptor, DataTypePtr type, std::string name, + std::shared_ptr<::parquet::internal::RecordReader> record_reader, + ParquetColumnReaderProfile profile = {}, + const cctz::time_zone* timezone = nullptr, bool enable_strict_mode = false, + std::function + decoded_value_appender = nullptr); + + Status read_batch(int64_t batch_rows, ParquetLeafBatch* batch, int64_t* rows_read) const; + + Status build_null_map(const ParquetLeafBatch& batch, int64_t records_read, + NullMap* null_map) const; + + Status append_values(const ParquetLeafBatch& batch, int64_t row_count, const NullMap* null_map, + MutableColumnPtr& column) const; + + // LEVELS / VALUE_SLOTS / LEAF_VALUES / PAYLOAD_VALUE_SLOTS. + Status read_nested_batch( + int64_t batch_rows, int16_t value_slot_definition_level, + ParquetNestedScalarBatch* batch, + int16_t value_slot_repetition_level = std::numeric_limits::max()) const; + + // COUNT(col) shape-only read path. It still calls Arrow RecordReader::ReadRecords() + // to advance the Parquet cursor and obtain def/rep levels, but Doris only copies levels: + // - it does not call BinaryRecordReader::GetBuilderChunks() + // - it does not build value_indices or values_column + // - it does not enter DataTypeSerde::read_column_from_decoded_values() + // This lets COUNT(col) on MAP/ARRAY/STRUCT evaluate top-level NULL state while avoiding + // materializing representative leaf STRING/BINARY payloads into Doris Columns. Arrow RecordReader + // does not expose a public levels-only API, so ReadRecords may still perform required page decoding; + // this API guarantees that the V2 reader does not take ownership of or copy value payloads. + Status read_nested_levels_batch(int64_t batch_rows, ParquetNestedScalarBatch* batch) const; + +private: + friend struct ParquetLeafReaderTestAccess; + + Status collect_batch(::parquet::internal::RecordReader& record_reader, + ParquetLeafBatch* batch) const; + + // Levels-only variant of collect_batch(). It snapshots only def/rep level state and does not take + // binary chunks or expose fixed-width value buffers. Used by the COUNT(col) aggregation fast path. + Status collect_levels_batch(::parquet::internal::RecordReader& record_reader, + ParquetLeafBatch* batch) const; + + Status build_spaced_fixed_values(const ParquetLeafBatch& batch, int64_t row_count, + const NullMap* null_map, + std::vector* spaced_values) const; + + Status build_nested_batch_from_leaf_batch(const ParquetLeafBatch& leaf_batch, + int64_t records_read, + int16_t value_slot_definition_level, + ParquetNestedScalarBatch* batch, + int16_t value_slot_repetition_level) const; + Status build_nested_levels_batch_from_leaf_batch(const ParquetLeafBatch& leaf_batch, + int64_t records_read, + ParquetNestedScalarBatch* batch) const; + + const ::parquet::ColumnDescriptor* _descriptor = + nullptr; // Arrow column descriptor (physical_type, max_dl, max_rl) + ParquetTypeDescriptor + _type_descriptor; // type encoding information (decimal precision, timestamp unit, etc.) + DataTypePtr _type; // Doris target type + std::string _name; // column name for error messages + std::shared_ptr<::parquet::internal::RecordReader> + _record_reader; // Arrow physical column reader (shared ownership) + ParquetColumnReaderProfile _profile; // profile counters + const cctz::time_zone* _timezone = nullptr; // timezone for timestamp conversion + bool _enable_strict_mode = false; // strict mode for type mismatch errors + std::function _decoded_value_appender; +}; + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/reader/row_position_column_reader.cpp b/be/src/format_v2/parquet/reader/row_position_column_reader.cpp new file mode 100644 index 00000000000000..4e9a363b13c7cb --- /dev/null +++ b/be/src/format_v2/parquet/reader/row_position_column_reader.cpp @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/reader/row_position_column_reader.h" + +#include + +#include "core/assert_cast.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_number.h" +#include "format_v2/parquet/parquet_column_schema.h" + +namespace doris::format::parquet { + +RowPositionColumnReader::RowPositionColumnReader(int64_t row_group_first_row, + ParquetColumnReaderProfile profile) + : ParquetColumnReader(ParquetColumnSchema {.name = format::ROW_POSITION_COLUMN_NAME}, + std::make_shared(), profile), + _row_group_first_row(row_group_first_row) {} + +int RowPositionColumnReader::file_column_id() const { + return format::ROW_POSITION_COLUMN_ID; +} + +int RowPositionColumnReader::parquet_leaf_column_id() const { + return -1; +} + +const DataTypePtr& RowPositionColumnReader::type() const { + return _type; +} + +const std::string& RowPositionColumnReader::name() const { + return _name; +} + +Status RowPositionColumnReader::read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) { + if (column.get() == nullptr || rows_read == nullptr) { + return Status::InvalidArgument("Invalid parquet row position read result pointer"); + } + if (rows < 0) { + return Status::InvalidArgument("Invalid parquet row position read rows {}", rows); + } + auto* vector_column = assert_cast(column.get()); + auto& data = vector_column->get_data(); + const auto old_size = data.size(); + data.resize(old_size + rows); + for (int64_t row = 0; row < rows; ++row) { + data[old_size + row] = _row_group_first_row + _next_row_position + row; + } + _next_row_position += rows; + *rows_read = rows; + return Status::OK(); +} + +Status RowPositionColumnReader::skip(int64_t rows) { + if (rows <= 0) { + return Status::OK(); + } + _next_row_position += rows; + return Status::OK(); +} + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/reader/row_position_column_reader.h b/be/src/format_v2/parquet/reader/row_position_column_reader.h new file mode 100644 index 00000000000000..934100317ec4fd --- /dev/null +++ b/be/src/format_v2/parquet/reader/row_position_column_reader.h @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "format_v2/parquet/reader/column_reader.h" + +namespace doris::format::parquet { + +class RowPositionColumnReader final : public ParquetColumnReader { +public: + explicit RowPositionColumnReader(int64_t row_group_first_row, + ParquetColumnReaderProfile profile = {}); + + int file_column_id() const override; + int parquet_leaf_column_id() const override; + const DataTypePtr& type() const override; + const std::string& name() const override; + + Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override; + Status skip(int64_t rows) override; + +private: + int64_t _row_group_first_row = 0; // first file row of the current row group + int64_t _next_row_position = 0; // next row position to emit +}; + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/reader/scalar_column_reader.cpp b/be/src/format_v2/parquet/reader/scalar_column_reader.cpp new file mode 100644 index 00000000000000..3c90279b4412b4 --- /dev/null +++ b/be/src/format_v2/parquet/reader/scalar_column_reader.cpp @@ -0,0 +1,315 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/reader/scalar_column_reader.h" + +#include + +#include +#include +#include + +#include "core/column/column.h" +#include "core/column/column_nullable.h" +#include "format_v2/parquet/parquet_column_schema.h" +#include "util/simd/bits.h" + +namespace doris::format::parquet { +namespace { + +class ParquetNestedScalarValueCursor { +public: + explicit ParquetNestedScalarValueCursor(const ParquetNestedScalarBatch* batch) { reset(batch); } + + void reset(const ParquetNestedScalarBatch* batch) { + DORIS_CHECK(batch != nullptr); + _batch = batch; + } + + Status value_index(const std::string& column_name, int64_t level_idx, int64_t* value_idx) { + DORIS_CHECK(_batch != nullptr); + DORIS_CHECK(value_idx != nullptr); + DORIS_CHECK(level_idx < _batch->levels_written); + DORIS_CHECK(level_idx >= 0); + DORIS_CHECK(static_cast(level_idx) < _batch->value_indices.size()); + const int64_t computed_value_idx = _batch->value_indices[static_cast(level_idx)]; + if (computed_value_idx < 0) { + return Status::Corruption("Nested parquet value is absent for column {}", column_name); + } + DORIS_CHECK(_batch->values_column.get() != nullptr); + if (computed_value_idx >= _batch->values_column->size()) { + return Status::Corruption("Nested parquet value index is out of range for column {}", + column_name); + } + *value_idx = computed_value_idx; + return Status::OK(); + } + +private: + const ParquetNestedScalarBatch* _batch = nullptr; +}; + +Status append_scalar_batch_value(const ScalarColumnReader& column_reader, + const ParquetNestedScalarBatch& batch, int64_t level_idx, + ParquetNestedScalarValueCursor* value_cursor, + MutableColumnPtr& column) { + DORIS_CHECK(value_cursor != nullptr); + int64_t value_idx = -1; + RETURN_IF_ERROR(value_cursor->value_index(column_reader.name(), level_idx, &value_idx)); + auto* nullable_column = check_and_get_column(*column); + if (nullable_column != nullptr) { + nullable_column->get_nested_column().insert_from(*batch.values_column, + static_cast(value_idx)); + nullable_column->get_null_map_data().push_back(0); + return Status::OK(); + } + column->insert_from(*batch.values_column, static_cast(value_idx)); + return Status::OK(); +} + +} // namespace + +ScalarColumnReader::ScalarColumnReader( + const ParquetColumnSchema& column_schema, + std::shared_ptr<::parquet::internal::RecordReader> record_reader, + const ParquetPageSkipPlan* page_skip_plan, const cctz::time_zone* timezone, + bool enable_strict_mode, ParquetColumnReaderProfile profile) + : ParquetColumnReader(column_schema, column_schema.type, profile), + _descriptor(column_schema.descriptor), + _type_descriptor(column_schema.type_descriptor), + _record_reader(std::move(record_reader)), + _page_skip_plan(page_skip_plan), + _timezone(timezone), + _enable_strict_mode(enable_strict_mode), + _nested_batch(std::make_unique()) {} + +ScalarColumnReader::~ScalarColumnReader() = default; + +Status ScalarColumnReader::read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) { + if (column.get() == nullptr || rows_read == nullptr) { + return Status::InvalidArgument("Invalid parquet column read result pointer for column {}", + _name); + } + if (_record_reader == nullptr) { + return Status::InternalError("Parquet record reader is not initialized for column {}", + _name); + } + auto reader = leaf_reader(); + ParquetLeafBatch leaf_batch; + RETURN_IF_ERROR(reader.read_batch(rows, &leaf_batch, rows_read)); + + NullMap null_map; + RETURN_IF_ERROR(reader.build_null_map(leaf_batch, *rows_read, &null_map)); + const auto value_kind = decoded_value_kind(_type_descriptor); + const bool is_binary_value = + value_kind == DecodedValueKind::BINARY || value_kind == DecodedValueKind::FIXED_BINARY; + if (!is_binary_value && leaf_batch.read_dense_for_nullable() && !null_map.empty()) { + const int64_t non_null_count = static_cast(simd::count_zero_num( + reinterpret_cast(null_map.data()), null_map.size())); + const int64_t null_count = *rows_read - non_null_count; + if (leaf_batch.values_written() != non_null_count) { + return Status::Corruption( + "Invalid dense nullable parquet record read result for column {}: values={}, " + "records={}, nulls={}", + _name, leaf_batch.values_written(), *rows_read, null_count); + } + } else if (!is_binary_value && !leaf_batch.read_dense_for_nullable() && + leaf_batch.values_written() != *rows_read) { + return Status::Corruption( + "Invalid parquet record read result for column {}: values={}, records={}", _name, + leaf_batch.values_written(), *rows_read); + } + + RETURN_IF_ERROR(reader.append_values(leaf_batch, *rows_read, &null_map, column)); + advance_rows_read(*rows_read); + update_reader_read_rows(*rows_read); + return Status::OK(); +} + +Status ScalarColumnReader::skip_records(int64_t rows) { + if (_record_reader == nullptr) { + return Status::InternalError("Parquet record reader is not initialized for column {}", + _name); + } + if (rows <= 0) { + return Status::OK(); + } + int64_t skipped_rows = 0; + try { + _record_reader->Reset(); + while (skipped_rows < rows) { + const int64_t skipped = _record_reader->SkipRecords(rows - skipped_rows); + if (skipped <= 0) { + return Status::Corruption( + "Failed to skip parquet records for column {}: skipped {} of {} rows", + _name, skipped_rows, rows); + } + skipped_rows += skipped; + } + } catch (const ::parquet::ParquetException& e) { + return Status::Corruption("Failed to skip parquet records for column {}: {}", _name, + e.what()); + } catch (const std::exception& e) { + return Status::InternalError("Failed to skip parquet records for column {}: {}", _name, + e.what()); + } + update_reader_skip_rows(rows); + return Status::OK(); +} + +int64_t ScalarColumnReader::page_filtered_rows_to_skip(int64_t rows) const { + if (_page_skip_plan == nullptr || rows <= 0) { + return 0; + } + const int64_t skip_end = _row_group_rows_read + rows; + int64_t filtered_rows = 0; + for (const auto& range : _page_skip_plan->skipped_ranges) { + const int64_t range_end = range.start + range.length; + if (range_end <= _row_group_rows_read) { + continue; + } + if (range.start >= skip_end) { + break; + } + const int64_t start = std::max(range.start, _row_group_rows_read); + const int64_t end = std::min(range_end, skip_end); + if (start < end) { + // Scheduler gap skips are derived from page-index selected_ranges. A page-filtered + // range can only overlap such a gap when the whole data page is outside every selected + // range, so partial overlap would mean the planner and scheduler are out of sync. + DORIS_CHECK(start == range.start); + DORIS_CHECK(end == range_end); + filtered_rows += end - start; + } + } + return filtered_rows; +} + +void ScalarColumnReader::advance_rows_read(int64_t rows) { + DORIS_CHECK(rows >= 0); + _row_group_rows_read += rows; +} + +Status ScalarColumnReader::skip(int64_t rows) { + if (rows <= 0) { + return Status::OK(); + } + + const int64_t page_filtered_rows = page_filtered_rows_to_skip(rows); + DORIS_CHECK(page_filtered_rows <= rows); + const int64_t record_reader_skip_rows = rows - page_filtered_rows; + RETURN_IF_ERROR(skip_records(record_reader_skip_rows)); + advance_rows_read(rows); + return Status::OK(); +} + +// The value index stream must advance on those null slots, otherwise later payload values shift. +Status ScalarColumnReader::load_nested_batch(int64_t rows) { + DORIS_CHECK(_nested_batch != nullptr); + reset_nested_build_level_cursor(); + const int16_t materialized_slot_definition_level = + static_cast(_definition_level - (_type->is_nullable() ? 1 : 0)); + RETURN_IF_ERROR(leaf_reader().read_nested_batch(rows, materialized_slot_definition_level, + _nested_batch.get(), _repetition_level)); + advance_rows_read(_nested_batch->records_read); + update_reader_read_rows(_nested_batch->records_read); + return Status::OK(); +} + +Status ScalarColumnReader::load_nested_levels_batch(int64_t rows) { + DORIS_CHECK(_nested_batch != nullptr); + reset_nested_build_level_cursor(); + RETURN_IF_ERROR(leaf_reader().read_nested_levels_batch(rows, _nested_batch.get())); + advance_rows_read(_nested_batch->records_read); + update_reader_read_rows(_nested_batch->records_read); + return Status::OK(); +} + +Status ScalarColumnReader::build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column, + int64_t* values_read) { + if (column.get() == nullptr || values_read == nullptr) { + return Status::InvalidArgument("Invalid parquet nested scalar build result for column {}", + _name); + } + DORIS_CHECK(_nested_batch != nullptr); + ParquetNestedScalarValueCursor value_cursor(_nested_batch.get()); + const int16_t materialized_slot_definition_level = _nested_batch->value_slot_definition_level; + *values_read = 0; + int64_t level_idx = nested_build_level_cursor(); + while (level_idx < _nested_batch->levels_written && *values_read < length_upper_bound) { + const int64_t current_level_idx = level_idx; + const int16_t def_level = _nested_batch->def_levels[current_level_idx]; + const int16_t rep_level = _nested_batch->rep_levels[current_level_idx]; + ++level_idx; + if (def_level < materialized_slot_definition_level || rep_level > _repetition_level) { + continue; + } + if (def_level == _definition_level) { + RETURN_IF_ERROR(append_scalar_batch_value(*this, *_nested_batch, current_level_idx, + &value_cursor, column)); + } else { + if (!_type->is_nullable() && def_level >= _nullable_definition_level) { + return Status::Corruption( + "Parquet scalar column {} contains null for non-nullable field", _name); + } + column->insert_default(); + } + ++*values_read; + } + set_nested_build_level_cursor(level_idx); + return Status::OK(); +} + +Status ScalarColumnReader::append_nested_value(int64_t level_idx, MutableColumnPtr& column) const { + if (column.get() == nullptr) { + return Status::InvalidArgument("Invalid parquet nested scalar append result for column {}", + _name); + } + DORIS_CHECK(_nested_batch != nullptr); + DORIS_CHECK(level_idx >= 0); + DORIS_CHECK(level_idx < _nested_batch->levels_written); + ParquetNestedScalarValueCursor value_cursor(_nested_batch.get()); + const int16_t def_level = _nested_batch->def_levels[level_idx]; + if (def_level == _definition_level) { + return append_scalar_batch_value(*this, *_nested_batch, level_idx, &value_cursor, column); + } + if (!_type->is_nullable()) { + return Status::Corruption("Parquet MAP column {} contains null for non-nullable value", + _name); + } + column->insert_default(); + return Status::OK(); +} + +const std::vector& ScalarColumnReader::nested_definition_levels() const { + DORIS_CHECK(_nested_batch != nullptr); + return _nested_batch->def_levels; +} + +const std::vector& ScalarColumnReader::nested_repetition_levels() const { + DORIS_CHECK(_nested_batch != nullptr); + return _nested_batch->rep_levels; +} + +int64_t ScalarColumnReader::nested_levels_written() const { + DORIS_CHECK(_nested_batch != nullptr); + return _nested_batch->levels_written; +} + +bool ScalarColumnReader::is_or_has_repeated_child() const { + return _repetition_level > 0; +} + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/reader/scalar_column_reader.h b/be/src/format_v2/parquet/reader/scalar_column_reader.h new file mode 100644 index 00000000000000..ab7ba0d7e54388 --- /dev/null +++ b/be/src/format_v2/parquet/reader/scalar_column_reader.h @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "format_v2/parquet/parquet_type.h" +#include "format_v2/parquet/reader/column_reader.h" +#include "format_v2/parquet/reader/parquet_leaf_reader.h" + +namespace parquet { +class ColumnDescriptor; + +namespace internal { +class RecordReader; +} // namespace internal +} // namespace parquet + +namespace cctz { +class time_zone; +} // namespace cctz + +namespace doris::format::parquet { + +struct ScalarColumnReaderTestAccess; + +// load_nested_batch() / build_nested_column() +class ScalarColumnReader final : public ParquetColumnReader { + friend class MapColumnReader; + friend struct ScalarColumnReaderTestAccess; + +public: + ScalarColumnReader(const ParquetColumnSchema& column_schema, + std::shared_ptr<::parquet::internal::RecordReader> record_reader, + const ParquetPageSkipPlan* page_skip_plan = nullptr, + const cctz::time_zone* timezone = nullptr, bool enable_strict_mode = false, + ParquetColumnReaderProfile profile = {}); + ~ScalarColumnReader() override; + + Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override; + Status skip(int64_t rows) override; + + Status load_nested_batch(int64_t rows) override; + Status load_nested_levels_batch(int64_t rows) override; + Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column, + int64_t* values_read) override; + const std::vector& nested_definition_levels() const override; + const std::vector& nested_repetition_levels() const override; + int64_t nested_levels_written() const override; + bool is_or_has_repeated_child() const override; + +private: + Status append_nested_value(int64_t level_idx, MutableColumnPtr& column) const; + + const ::parquet::ColumnDescriptor* descriptor() const { return _descriptor; } + + ParquetLeafReader leaf_reader() const { + return ParquetLeafReader(_descriptor, _type_descriptor, _type, _name, _record_reader, + _profile, _timezone, _enable_strict_mode); + } + + void advance_rows_read(int64_t rows); + Status skip_records(int64_t rows); + int64_t page_filtered_rows_to_skip(int64_t rows) const; + + const ::parquet::ColumnDescriptor* _descriptor = nullptr; // Arrow column descriptor + ParquetTypeDescriptor _type_descriptor; // type encoding information + std::shared_ptr<::parquet::internal::RecordReader> + _record_reader; // Arrow physical column reader + const ParquetPageSkipPlan* _page_skip_plan = + nullptr; // page-index pruning result (may be nullptr) + const cctz::time_zone* _timezone = nullptr; // timezone + bool _enable_strict_mode = false; // strict mode + int64_t _row_group_rows_read = 0; // rows read in the current row group (cursor) + std::unique_ptr _nested_batch; // intermediate result for nested reads +}; + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/reader/struct_column_reader.cpp b/be/src/format_v2/parquet/reader/struct_column_reader.cpp new file mode 100644 index 00000000000000..66e450c567133a --- /dev/null +++ b/be/src/format_v2/parquet/reader/struct_column_reader.cpp @@ -0,0 +1,258 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/reader/struct_column_reader.h" + +#include +#include +#include +#include + +#include "core/column/column_struct.h" +#include "format_v2/parquet/reader/nested_column_materializer.h" +#include "format_v2/parquet/reader/scalar_column_reader.h" + +namespace doris::format::parquet { + +ParquetColumnReader* StructColumnReader::shape_source_reader() const { + for (const auto& child : _children) { + auto* child_reader = child.get(); + DORIS_CHECK(child_reader != nullptr); + if (!child_reader->is_or_has_repeated_child()) { + return child_reader; + } + } + if (_children.empty()) { + return nullptr; + } + return _children[0].get(); +} + +Status StructColumnReader::advance_child_past_null_parent(ParquetColumnReader* child_reader, + int64_t parent_level_idx) const { + DORIS_CHECK(child_reader != nullptr); + const int64_t next_child_cursor = parent_level_idx + 1; + if (auto* scalar_child = dynamic_cast(child_reader)) { + if (next_child_cursor > scalar_child->nested_levels_written()) { + return Status::Corruption( + "Parquet STRUCT child {} ended before null parent row in column {}", + scalar_child->name(), _name); + } + scalar_child->set_nested_build_level_cursor( + std::max(scalar_child->nested_build_level_cursor(), next_child_cursor)); + return Status::OK(); + } + if (auto* struct_child = dynamic_cast(child_reader); + struct_child != nullptr && !struct_child->is_or_has_repeated_child()) { + if (next_child_cursor > struct_child->nested_levels_written()) { + return Status::Corruption( + "Parquet STRUCT child {} ended before null parent row in column {}", + struct_child->name(), _name); + } + struct_child->set_nested_build_level_cursor( + std::max(struct_child->nested_build_level_cursor(), next_child_cursor)); + for (auto& grandchild : struct_child->_children) { + RETURN_IF_ERROR(struct_child->advance_child_past_null_parent(grandchild.get(), + parent_level_idx)); + } + return Status::OK(); + } + + int64_t child_cursor = child_reader->nested_build_level_cursor(); + const auto& child_rep_levels = child_reader->nested_repetition_levels(); + const int64_t child_levels_written = child_reader->nested_levels_written(); + while (child_cursor < child_levels_written) { + const int16_t child_rep_level = child_rep_levels[child_cursor]; + ++child_cursor; + if (!child_reader->is_or_has_repeated_child() || child_rep_level <= _repetition_level) { + break; + } + } + child_reader->set_nested_build_level_cursor(child_cursor); + return Status::OK(); +} + +Status StructColumnReader::read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) { + RETURN_IF_ERROR(load_nested_batch(rows)); + return build_nested_column(rows, column, rows_read); +} + +Status StructColumnReader::skip(int64_t rows) { + if (rows <= 0) { + return Status::OK(); + } + auto scratch_column = _type->create_column(); + RETURN_IF_ERROR(load_nested_batch(rows)); + int64_t rows_read = 0; + RETURN_IF_ERROR(build_nested_column(rows, scratch_column, &rows_read)); + if (rows_read != rows) { + return Status::Corruption("Failed to skip parquet STRUCT column {}: skipped {} of {} rows", + _name, rows_read, rows); + } + update_reader_skip_rows(rows); + return Status::OK(); +} + +Status StructColumnReader::load_nested_batch(int64_t rows) { + reset_nested_build_level_cursor(); + for (auto& child_reader : _children) { + DORIS_CHECK(child_reader != nullptr); + RETURN_IF_ERROR(child_reader->load_nested_batch(rows)); + } + return Status::OK(); +} + +Status StructColumnReader::build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column, + int64_t* values_read) { + if (column.get() == nullptr || values_read == nullptr) { + return Status::InvalidArgument("Invalid parquet struct build result pointer for column {}", + _name); + } + if (_children.empty()) { + column->resize(column->size() + static_cast(length_upper_bound)); + *values_read = length_upper_bound; + return Status::OK(); + } + auto* struct_column = struct_column_from_output(column); + DORIS_CHECK(struct_column != nullptr); + auto* parent_null_map = null_map_from_nullable_output(column); + auto* shape_reader = shape_source_reader(); + DORIS_CHECK(shape_reader != nullptr); + const auto& def_levels = shape_reader->nested_definition_levels(); + const auto& rep_levels = shape_reader->nested_repetition_levels(); + const int64_t levels_written = shape_reader->nested_levels_written(); + + NullMap parent_nulls; + std::vector parent_level_indices; + *values_read = 0; + int64_t level_idx = nested_build_level_cursor(); + while (level_idx < levels_written) { + const int64_t current_level_idx = level_idx; + const int16_t def_level = def_levels[level_idx]; + const int16_t rep_level = rep_levels[level_idx]; + const bool starts_parent = + !shape_reader->is_or_has_repeated_child() || rep_level <= _repetition_level; + if (starts_parent && *values_read >= length_upper_bound) { + break; + } + ++level_idx; + if (def_level < _repeated_ancestor_definition_level) { + continue; + } + if (shape_reader->is_or_has_repeated_child() && rep_level > _repetition_level) { + continue; + } + const bool parent_is_null = def_level < _nullable_definition_level; + if (parent_is_null && parent_null_map == nullptr) { + return Status::Corruption( + "Parquet STRUCT column {} contains null for non-nullable struct", _name); + } + parent_nulls.push_back(parent_is_null); + parent_level_indices.push_back(current_level_idx); + ++*values_read; + } + set_nested_build_level_cursor(level_idx); + + std::vector child_columns; + child_columns.reserve(struct_column->get_columns().size()); + for (size_t child_idx = 0; child_idx < struct_column->get_columns().size(); ++child_idx) { + child_columns.push_back(struct_column->get_column_ptr(child_idx)->assert_mutable()); + } + for (size_t child_idx = 0; child_idx < _children.size(); ++child_idx) { + const int output_idx = _child_output_indices[child_idx]; + if (output_idx < 0) { + continue; + } + // STRUCT owns row alignment. Child readers consume only present parent rows from their + // level streams; null STRUCT parents become default placeholders in every child column. + // This mirrors Arrow's separation between struct validity and child array materialization, + // and avoids asking scalar/list/map children to invent values for an absent parent. + int64_t pending_present_rows = 0; + int64_t total_child_rows = 0; + auto flush_present_rows = [&]() -> Status { + if (pending_present_rows == 0) { + return Status::OK(); + } + int64_t child_rows = 0; + RETURN_IF_ERROR(_children[child_idx]->build_nested_column( + pending_present_rows, child_columns[output_idx], &child_rows)); + if (child_rows != pending_present_rows) { + return Status::Corruption( + "Parquet STRUCT child {} built {} rows, expected {} for column {}", + _children[child_idx]->name(), child_rows, pending_present_rows, _name); + } + total_child_rows += child_rows; + pending_present_rows = 0; + return Status::OK(); + }; + for (size_t parent_idx = 0; parent_idx < parent_nulls.size(); ++parent_idx) { + const auto parent_is_null = parent_nulls[parent_idx]; + if (!parent_is_null) { + ++pending_present_rows; + continue; + } + RETURN_IF_ERROR(flush_present_rows()); + child_columns[output_idx]->insert_default(); + RETURN_IF_ERROR(advance_child_past_null_parent(_children[child_idx].get(), + parent_level_indices[parent_idx])); + ++total_child_rows; + } + RETURN_IF_ERROR(flush_present_rows()); + if (total_child_rows != *values_read) { + return Status::Corruption( + "Parquet STRUCT child {} built {} rows, expected {} for column {}", + _children[child_idx]->name(), total_child_rows, *values_read, _name); + } + } + for (size_t child_idx = 0; child_idx < child_columns.size(); ++child_idx) { + struct_column->get_column_ptr(child_idx) = std::move(child_columns[child_idx]); + } + append_parent_nulls(parent_null_map, parent_nulls); + return Status::OK(); +} + +const std::vector& StructColumnReader::nested_definition_levels() const { + auto* shape_reader = shape_source_reader(); + DORIS_CHECK(shape_reader != nullptr); + return shape_reader->nested_definition_levels(); +} + +const std::vector& StructColumnReader::nested_repetition_levels() const { + auto* shape_reader = shape_source_reader(); + DORIS_CHECK(shape_reader != nullptr); + return shape_reader->nested_repetition_levels(); +} + +int64_t StructColumnReader::nested_levels_written() const { + auto* shape_reader = shape_source_reader(); + DORIS_CHECK(shape_reader != nullptr); + return shape_reader->nested_levels_written(); +} + +bool StructColumnReader::is_or_has_repeated_child() const { + auto* shape_reader = shape_source_reader(); + return shape_reader != nullptr && shape_reader->is_or_has_repeated_child(); +} + +void StructColumnReader::advance_nested_build_level_cursor_past_parent( + int16_t parent_repetition_level) { + ParquetColumnReader::advance_nested_build_level_cursor_past_parent(parent_repetition_level); + for (auto& child : _children) { + DORIS_CHECK(child != nullptr); + child->advance_nested_build_level_cursor_past_parent(parent_repetition_level); + } +} + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/reader/struct_column_reader.h b/be/src/format_v2/parquet/reader/struct_column_reader.h new file mode 100644 index 00000000000000..3e88b75cede3d9 --- /dev/null +++ b/be/src/format_v2/parquet/reader/struct_column_reader.h @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include + +#include "format_v2/parquet/parquet_column_schema.h" +#include "format_v2/parquet/reader/column_reader.h" + +namespace doris::format::parquet { + +class StructColumnReader final : public ParquetColumnReader { +public: + StructColumnReader(const ParquetColumnSchema& schema, DataTypePtr type, + std::vector> children, + std::vector child_output_indices, + ParquetColumnReaderProfile profile = {}) + : ParquetColumnReader(schema, type, profile), + _children(std::move(children)), + _child_output_indices(std::move(child_output_indices)) { + DCHECK_EQ(_children.size(), _child_output_indices.size()); + } + + Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override; + Status skip(int64_t rows) override; + Status load_nested_batch(int64_t rows) override; + Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column, + int64_t* values_read) override; + const std::vector& nested_definition_levels() const override; + const std::vector& nested_repetition_levels() const override; + int64_t nested_levels_written() const override; + bool is_or_has_repeated_child() const override; + void advance_nested_build_level_cursor_past_parent(int16_t parent_repetition_level) override; + +private: + ParquetColumnReader* shape_source_reader() const; + Status advance_child_past_null_parent(ParquetColumnReader* child_reader, + int64_t parent_level_idx) const; + + std::vector> _children; // projected child readers + std::vector _child_output_indices; // child reader -> struct output position mapping +}; + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/parquet/selection_vector.h b/be/src/format_v2/parquet/selection_vector.h new file mode 100644 index 00000000000000..589154d4acc0e4 --- /dev/null +++ b/be/src/format_v2/parquet/selection_vector.h @@ -0,0 +1,163 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +#include "common/check.h" +#include "common/status.h" + +namespace doris::format::parquet { + +struct RowRange { + int64_t start = 0; + int64_t length = 0; +}; + +struct ParquetPageSkipPlan { + int leaf_column_id = -1; + // Page ordinal is the data-page ordinal in the column chunk. It intentionally excludes + // dictionary pages, matching Arrow PageReader::set_data_page_filter(). + std::vector skipped_pages; + std::vector skipped_page_compressed_sizes; + // Row ranges covered by skipped data pages. ScalarColumnReader uses these ranges to avoid + // calling RecordReader::SkipRecords() again for pages already skipped by Arrow. + std::vector skipped_ranges; + + bool empty() const { return skipped_ranges.empty(); } + + bool should_skip_page(size_t page_idx) const { + return page_idx < skipped_pages.size() && skipped_pages[page_idx] != 0; + } + + int64_t skipped_page_compressed_size(size_t page_idx) const { + DCHECK_LT(page_idx, skipped_page_compressed_sizes.size()); + return skipped_page_compressed_sizes[page_idx]; + } +}; + +class SelectionVector { +public: + using Index = uint16_t; + + SelectionVector() = default; + + explicit SelectionVector(size_t count) { resize(count); } + + SelectionVector(Index* data, size_t count) { initialize(data, count); } + + void initialize(Index* data, size_t count) { + _owned.clear(); + _data = data; + _size = count; + } + + void resize(size_t count) { + _owned.resize(count); + _data = _owned.data(); + _size = count; + for (size_t idx = 0; idx < count; ++idx) { + _data[idx] = static_cast(idx); + } + } + + void clear() { + _owned.clear(); + _data = nullptr; + _size = 0; + } + + size_t size() const { return _size; } + + bool is_set() const { return _data != nullptr; } + + Index* data() { return _data; } + + const Index* data() const { return _data; } + + size_t get_index(size_t idx) const { + if (_data == nullptr) { + return idx; + } + return _data[idx]; + } + + void set_index(size_t idx, Index value) { _data[idx] = value; } + + Status verify(size_t count, int64_t batch_rows) const { + if (batch_rows < 0) { + return Status::InvalidArgument("Negative parquet selection batch rows {}", batch_rows); + } + if (std::cmp_greater(count, batch_rows)) { + return Status::InvalidArgument("Parquet selection count {} exceeds batch rows {}", + count, batch_rows); + } + if (_data != nullptr && count > _size) { + return Status::InvalidArgument("Parquet selection count {} exceeds vector size {}", + count, _size); + } + size_t previous = 0; + for (size_t idx = 0; idx < count; ++idx) { + const size_t current = get_index(idx); + if (std::cmp_greater_equal(current, batch_rows)) { + return Status::InvalidArgument( + "Parquet selection index {} out of range [0, {}) at position {}", current, + batch_rows, idx); + } + if (idx > 0 && current <= previous) { + return Status::InvalidArgument( + "Parquet selection index {} is not strictly greater than previous {} at " + "position {}", + current, previous, idx); + } + previous = current; + } + return Status::OK(); + } + +private: + std::vector _owned; + Index* _data = nullptr; + size_t _size = 0; +}; + +inline std::vector selection_to_ranges(const SelectionVector& selection, + uint16_t selected_rows) { + std::vector ranges; + if (selected_rows == 0) { + return ranges; + } + + int64_t range_start = selection.get_index(0); + int64_t previous = selection.get_index(0); + for (uint16_t selection_idx = 1; selection_idx < selected_rows; ++selection_idx) { + const int64_t current = selection.get_index(selection_idx); + if (current == previous + 1) { + previous = current; + continue; + } + ranges.push_back(RowRange {.start = range_start, .length = previous - range_start + 1}); + range_start = current; + previous = current; + } + ranges.push_back(RowRange {.start = range_start, .length = previous - range_start + 1}); + return ranges; +} + +} // namespace doris::format::parquet diff --git a/be/src/format_v2/schema_projection.cpp b/be/src/format_v2/schema_projection.cpp new file mode 100644 index 00000000000000..342f4c91898c92 --- /dev/null +++ b/be/src/format_v2/schema_projection.cpp @@ -0,0 +1,147 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/schema_projection.h" + +#include +#include +#include + +#include "core/assert_cast.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_struct.h" + +namespace doris::format { +namespace { + +// Rebuild the complex DataType for one already-pruned semantic ColumnDefinition node. +// +// The caller has already matched the projection against ColumnDefinition::children and preserved +// the file-local child order. This helper only mirrors those projected semantic children into the +// node type. It intentionally does not understand physical format wrappers. In particular, a MAP +// node is expected to have semantic children [key, value], even if the underlying format stores a +// wrapper such as Parquet key_value/entry. +Status rebuild_semantic_projected_type(const DataTypePtr& original_type, + const std::vector& projected_children, + DataTypePtr* projected_type) { + DORIS_CHECK(original_type != nullptr); + DORIS_CHECK(projected_type != nullptr); + + DataTypePtr nested_projected_type; + const auto primitive_type = remove_nullable(original_type)->get_primitive_type(); + switch (primitive_type) { + case TYPE_STRUCT: { + DataTypes child_types; + Strings child_names; + child_types.reserve(projected_children.size()); + child_names.reserve(projected_children.size()); + for (const auto& child : projected_children) { + child_types.push_back(child.type); + child_names.push_back(child.name); + } + nested_projected_type = std::make_shared(child_types, child_names); + break; + } + case TYPE_ARRAY: + DORIS_CHECK(projected_children.size() == 1); + nested_projected_type = std::make_shared(projected_children[0].type); + break; + case TYPE_MAP: { + DORIS_CHECK(remove_nullable(original_type)->get_primitive_type() == TYPE_MAP); + const auto* original_map_type = + assert_cast(remove_nullable(original_type).get()); + DataTypePtr key_type = original_map_type->get_key_type(); + DataTypePtr value_type; + for (const auto& child : projected_children) { + // Partial MAP projection only prunes the value subtree. The key stream must remain + // complete because it defines entry existence and offsets when materializing ColumnMap; + // the projected DataTypeMap also preserves the original key type instead of rebuilding + // it from children. If a caller includes key in the semantic child list, ignore it + // here; the presence of a value child still decides the projected value shape. + if (child.file_local_id() == 0 || child.name == "key") { + continue; + } + if (child.file_local_id() == 1 || child.name == "value") { + value_type = child.type; + } + } + if (value_type == nullptr) { + return Status::NotSupported("MAP projection for type {} contains no value child", + original_type->get_name()); + } + nested_projected_type = std::make_shared(key_type, value_type); + break; + } + default: + return Status::InvalidArgument("Cannot project children from non-complex type {}", + original_type->get_name()); + } + + *projected_type = original_type->is_nullable() ? make_nullable(nested_projected_type) + : nested_projected_type; + return Status::OK(); +} + +} // namespace + +Status project_column_definition(const ColumnDefinition& field, const LocalColumnIndex& projection, + ColumnDefinition* projected_field) { + if (projected_field == nullptr) { + return Status::InvalidArgument("projected_field is null"); + } + *projected_field = field; + if (projection.project_all_children || projection.children.empty()) { + return Status::OK(); + } + + projected_field->children.clear(); + for (const auto& child_projection : projection.children) { + if (child_projection.local_id() == -1) { + return Status::InvalidArgument("Empty projection path for field {}", field.name); + } + const auto child_it = + std::ranges::find_if(field.children, [&](const ColumnDefinition& child) { + return child.file_local_id() == child_projection.local_id(); + }); + if (child_it == field.children.end()) { + return Status::InvalidArgument("Invalid projection child id {} for field {}", + child_projection.local_id(), field.name); + } + } + for (const auto& child : field.children) { + const auto child_projection_it = + std::ranges::find_if(projection.children, [&](const LocalColumnIndex& child_proj) { + return child_proj.local_id() == child.file_local_id(); + }); + if (child_projection_it == projection.children.end()) { + continue; + } + ColumnDefinition projected_child; + RETURN_IF_ERROR(project_column_definition(child, *child_projection_it, &projected_child)); + projected_field->children.push_back(std::move(projected_child)); + } + if (projected_field->children.empty()) { + return Status::NotSupported("Projection for field {} contains no children", field.name); + } + + return rebuild_semantic_projected_type(field.type, projected_field->children, + &projected_field->type); +} + +} // namespace doris::format diff --git a/be/src/format_v2/schema_projection.h b/be/src/format_v2/schema_projection.h new file mode 100644 index 00000000000000..c2125d66931631 --- /dev/null +++ b/be/src/format_v2/schema_projection.h @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "common/status.h" +#include "format_v2/file_reader.h" + +namespace doris::format { + +// Build a projected file-local semantic schema node from a full schema node and a nested +// LocalColumnIndex projection. +// +// This module is deliberately about semantic ColumnDefinition trees, not physical file-format +// trees. FileReader::get_schema() returns file-local columns after type conversion to Doris +// DataType, and their children must follow Doris semantics: +// +// STRUCT children = fields +// ARRAY children = [element] +// MAP children = [key, value] +// +// Format-specific wrappers, such as Parquet MAP key_value/entry nodes, are intentionally hidden +// from this API. A format reader that needs those wrappers for its physical reader tree should +// translate the semantic projection back to its physical layout internally. +// +// The function does three things: +// - Copies `field` metadata to `projected_field`. +// - Recursively prunes children according to `projection.children`, matching children by +// ColumnDefinition::file_local_id() rather than vector ordinal. The root projection id is not +// interpreted here because the caller has already selected `field`. +// - Rebuilds the node DataType from the projected semantic children so the returned definition is +// self-consistent. STRUCT uses projected child names/types, ARRAY uses the projected element +// type, and MAP preserves the original key type while rebuilding the projected value type. +// +// A full projection copies `field` unchanged. Partial MAP projection only uses the value child for +// type rebuilding. MAP is materialized as offsets + keys + values, so the reader must still read +// the complete key stream to build entry shape and offsets. If the semantic projection includes +// the key child, it is ignored here; key-only MAP projections are rejected because they do not +// define a value shape. +Status project_column_definition(const ColumnDefinition& field, const LocalColumnIndex& projection, + ColumnDefinition* projected_field); + +} // namespace doris::format diff --git a/be/src/format_v2/table/hive_reader.cpp b/be/src/format_v2/table/hive_reader.cpp new file mode 100644 index 00000000000000..71a5a7ad57cfd9 --- /dev/null +++ b/be/src/format_v2/table/hive_reader.cpp @@ -0,0 +1,150 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/table/hive_reader.h" + +#include "common/consts.h" +#include "format_v2/column_mapper.h" +#include "format_v2/file_reader.h" +#include "runtime/runtime_state.h" + +namespace doris::format::hive { +namespace { + +TFileFormatType::type format_type_from_context(const format::ProjectedColumnBuildContext& context) { + DORIS_CHECK(context.scan_params != nullptr); + if (context.range != nullptr && context.range->__isset.format_type) { + return context.range->format_type; + } + return context.scan_params->format_type; +} + +bool use_column_position_mapping(const format::ProjectedColumnBuildContext& context) { + if (context.runtime_state == nullptr || context.scan_params == nullptr) { + return false; + } + switch (format_type_from_context(context)) { + case TFileFormatType::FORMAT_PARQUET: + return !context.runtime_state->query_options().hive_parquet_use_column_names; + default: + return false; + } +} + +bool is_file_column_position_slot(const TFileScanSlotInfo& slot_info, + const std::string& column_name) { + if (column_name.starts_with(BeConsts::GLOBAL_ROWID_COL) || + column_name == BeConsts::ICEBERG_ROWID_COL) { + return false; + } + if (slot_info.__isset.is_file_slot) { + return slot_info.is_file_slot; + } + return !slot_info.__isset.category || slot_info.category != TColumnCategory::PARTITION_KEY; +} + +} // namespace + +Status HiveReader::prepare_split(const format::SplitReadOptions& options) { + if (options.current_split_format != _format) { + return Status::InternalError( + "Hive scan expects all splits to use the same file format, " + "initialized_format={}, current_split_format={}", + static_cast(_format), static_cast(options.current_split_format)); + } + return format::TableReader::prepare_split(options); +} + +format::TableColumnMappingMode HiveReader::mapping_mode() const { + // Hive-specific behavior: choose the column matching mode based on file format and the + // matching session variable. + // - hive_orc_use_column_names / hive_parquet_use_column_names == true + // => BY_NAME (modern Hive default, match by column name) + // - those options == false + // => BY_INDEX (mainly for Hive1 ORC `_col0` / `_col1`, match by top-level position; + // Parquet exposes the same switch for consistency) + // TableReader updates `_format` in prepare_split(), so this is evaluated per split. + DORIS_CHECK(_runtime_state != nullptr); + const auto& query_options = _runtime_state->query_options(); + bool use_column_names = true; + if (_format == format::FileFormat::ORC) { + use_column_names = query_options.hive_orc_use_column_names; + } else if (_format == format::FileFormat::PARQUET) { + use_column_names = query_options.hive_parquet_use_column_names; + } else if (_format == format::FileFormat::CSV || _format == format::FileFormat::TEXT || + _format == format::FileFormat::JSON) { + // Hive CSV/TEXT/JSON readers synthesize a file-local schema from FE-provided file slots + // because these formats do not carry embedded column names or field ids. The scan params' + // format-specific attributes still tell the physical reader how to read values, while the + // table-level mapper can safely match the synthesized file schema by table column name. + use_column_names = true; + } else { + DORIS_CHECK(false) << "HiveReader does not support this file reader format"; + } + + return use_column_names ? format::TableColumnMappingMode::BY_NAME + : format::TableColumnMappingMode::BY_INDEX; +} + +Status HiveReader::annotate_projected_column(const TFileScanSlotInfo& slot_info, + format::ProjectedColumnBuildContext* context, + format::ColumnDefinition* column) const { + RETURN_IF_ERROR(format::TableReader::annotate_projected_column(slot_info, context, column)); + DORIS_CHECK(context != nullptr); + DORIS_CHECK(column != nullptr); + if (!use_column_position_mapping(*context) || + !is_file_column_position_slot(slot_info, column->name)) { + return Status::OK(); + } + const auto* scan_params = context->scan_params; + DORIS_CHECK(scan_params != nullptr); + if (!scan_params->__isset.column_idxs || + context->next_file_column_idx >= scan_params->column_idxs.size()) { + return Status::InvalidArgument( + "Hive positional column mapping is missing file index for column '{}', " + "required file slot ordinal={}, column_idxs_size={}", + column->name, context->next_file_column_idx, + scan_params->__isset.column_idxs ? scan_params->column_idxs.size() : 0); + } + const auto file_index = scan_params->column_idxs[context->next_file_column_idx]; + if (file_index < 0) { + return Status::InvalidArgument( + "Hive positional column mapping has negative file index {} for column '{}'", + file_index, column->name); + } + column->identifier = Field::create_field(file_index); + ++context->next_file_column_idx; + return Status::OK(); +} + +Status HiveReader::validate_projected_columns( + const format::ProjectedColumnBuildContext& context) const { + if (!use_column_position_mapping(context)) { + return Status::OK(); + } + DORIS_CHECK(context.scan_params != nullptr); + if (context.scan_params->__isset.column_idxs && + context.next_file_column_idx != context.scan_params->column_idxs.size()) { + return Status::InvalidArgument( + "Hive positional column mapping has unused file indexes: consumed={}, " + "column_idxs_size={}", + context.next_file_column_idx, context.scan_params->column_idxs.size()); + } + return Status::OK(); +} + +} // namespace doris::format::hive diff --git a/be/src/format_v2/table/hive_reader.h b/be/src/format_v2/table/hive_reader.h new file mode 100644 index 00000000000000..50d21c663cc542 --- /dev/null +++ b/be/src/format_v2/table/hive_reader.h @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "common/status.h" +#include "format_v2/table_reader.h" + +namespace doris::format::hive { +// now hive self only support mixed with orc/parquet files in table and different partitions. +// But if mixed with orc/parquet files in table and same partition, will failed when read. +// now fe will plan table format for all files dirctly, and BE could not handle mixed files also. +class HiveReader final : public format::TableReader { +public: + ENABLE_FACTORY_CREATOR(HiveReader); + ~HiveReader() final = default; + + Status prepare_split(const format::SplitReadOptions& options) override; + format::TableColumnMappingMode mapping_mode() const override; + Status annotate_projected_column(const TFileScanSlotInfo& slot_info, + format::ProjectedColumnBuildContext* context, + format::ColumnDefinition* column) const override; + Status validate_projected_columns( + const format::ProjectedColumnBuildContext& context) const override; +}; + +} // namespace doris::format::hive diff --git a/be/src/format_v2/table/hudi_reader.cpp b/be/src/format_v2/table/hudi_reader.cpp new file mode 100644 index 00000000000000..d76be201067bd7 --- /dev/null +++ b/be/src/format_v2/table/hudi_reader.cpp @@ -0,0 +1,163 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/table/hudi_reader.h" + +#include + +#include "exprs/vexpr_context.h" +#include "format_v2/column_mapper.h" +#include "format_v2/jni/hudi_jni_reader.h" +#include "format_v2/table/schema_history_util.h" +#include "gen_cpp/PlanNodes_types.h" + +namespace doris::format::hudi { + +Status HudiReader::prepare_split(const format::SplitReadOptions& options) { + _split_schema_id = -1; + if (options.current_range.__isset.table_format_params && + options.current_range.table_format_params.__isset.hudi_params && + options.current_range.table_format_params.hudi_params.__isset.schema_id) { + _split_schema_id = options.current_range.table_format_params.hudi_params.schema_id; + } + return format::TableReader::prepare_split(options); +} + +format::TableColumnMappingMode HudiReader::mapping_mode() const { + return format::can_map_by_history_schema(_scan_params, _split_schema_id) + ? format::TableColumnMappingMode::BY_FIELD_ID + : format::TableColumnMappingMode::BY_NAME; +} + +Status HudiReader::annotate_file_schema(std::vector* file_schema) { + DORIS_CHECK(file_schema != nullptr); + if (mapping_mode() != format::TableColumnMappingMode::BY_FIELD_ID) { + return Status::OK(); + } + return format::annotate_file_schema_from_history(_scan_params, _split_schema_id, file_schema); +} + +Status HudiHybridReader::init(format::TableReadOptions&& options) { + return format::TableReader::init(std::move(options)); +} + +Status HudiHybridReader::prepare_split(const format::SplitReadOptions& options) { + RETURN_IF_ERROR(_ensure_current_split_reader(options)); + DORIS_CHECK(_current_split_reader != nullptr); + return _current_split_reader->prepare_split(options); +} + +Status HudiHybridReader::get_block(Block* block, bool* eos) { + DORIS_CHECK(_current_split_reader != nullptr); + return _current_split_reader->get_block(block, eos); +} + +Status HudiHybridReader::close() { + Status close_status = Status::OK(); + if (_native_reader != nullptr) { + close_status = _native_reader->close(); + } + if (_jni_reader != nullptr) { + auto status = _jni_reader->close(); + if (!status.ok() && close_status.ok()) { + close_status = std::move(status); + } + } + _current_split_reader = nullptr; + return close_status; +} + +Status HudiHybridReader::_ensure_current_split_reader(const format::SplitReadOptions& options) { + DORIS_CHECK(_scan_params != nullptr); + if (_is_jni_split(*_scan_params, options.current_range)) { + if (_jni_reader == nullptr) { + _jni_reader = std::make_unique(); + RETURN_IF_ERROR(_init_child_reader(_jni_reader.get(), format::FileFormat::JNI)); + } + _current_split_reader = _jni_reader.get(); + } else { + format::FileFormat file_format; + RETURN_IF_ERROR(_to_file_format(*_scan_params, options.current_range, &file_format)); + if (_native_reader == nullptr) { + _native_reader = format::hudi::HudiReader::create_unique(); + RETURN_IF_ERROR(_init_child_reader(_native_reader.get(), file_format)); + } + _current_split_reader = _native_reader.get(); + } + return Status::OK(); +} + +Status HudiHybridReader::_init_child_reader(format::TableReader* reader, + format::FileFormat file_format) { + DORIS_CHECK(reader != nullptr); + VExprContextSPtrs conjuncts; + RETURN_IF_ERROR(_clone_conjuncts(&conjuncts)); + return reader->init({ + .projected_columns = _projected_columns, + .column_predicates = _table_column_predicates, + .conjuncts = std::move(conjuncts), + .format = file_format, + .scan_params = _scan_params, + .io_ctx = _io_ctx, + .runtime_state = _runtime_state, + .scanner_profile = _scanner_profile, + .push_down_agg_type = _push_down_agg_type, + .condition_cache_digest = _condition_cache_digest, + }); +} + +Status HudiHybridReader::_clone_conjuncts(VExprContextSPtrs* conjuncts) const { + DORIS_CHECK(conjuncts != nullptr); + conjuncts->clear(); + conjuncts->reserve(_conjuncts.size()); + for (const auto& conjunct : _conjuncts) { + VExprSPtr root; + RETURN_IF_ERROR(format::clone_table_expr_tree(conjunct->root(), &root)); + conjuncts->push_back(VExprContext::create_shared(std::move(root))); + } + return Status::OK(); +} + +TFileFormatType::type HudiHybridReader::_range_format_type(const TFileScanRangeParams& params, + const TFileRangeDesc& range) { + return range.__isset.format_type ? range.format_type : params.format_type; +} + +bool HudiHybridReader::_is_jni_split(const TFileScanRangeParams& params, + const TFileRangeDesc& range) { + return _range_format_type(params, range) == TFileFormatType::FORMAT_JNI; +} + +Status HudiHybridReader::_to_file_format(const TFileScanRangeParams& params, + const TFileRangeDesc& range, + format::FileFormat* file_format) { + DORIS_CHECK(file_format != nullptr); + const auto format_type = _range_format_type(params, range); + switch (format_type) { + case TFileFormatType::FORMAT_PARQUET: + *file_format = format::FileFormat::PARQUET; + return Status::OK(); + case TFileFormatType::FORMAT_ORC: + *file_format = format::FileFormat::ORC; + return Status::OK(); + default: + return Status::NotSupported("Unsupported native Hudi file format {}", + to_string(format_type)); + } +} + +} // namespace doris::format::hudi diff --git a/be/src/format_v2/table/hudi_reader.h b/be/src/format_v2/table/hudi_reader.h new file mode 100644 index 00000000000000..aeaaedf6ab6064 --- /dev/null +++ b/be/src/format_v2/table/hudi_reader.h @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "format_v2/table_reader.h" + +namespace doris::format::hudi { + +class HudiReader final : public format::TableReader { +public: + ENABLE_FACTORY_CREATOR(HudiReader); + ~HudiReader() final = default; + + Status prepare_split(const format::SplitReadOptions& options) override; + +#ifdef BE_TEST + void TEST_set_scan_params(TFileScanRangeParams* params) { _scan_params = params; } + format::TableColumnMappingMode TEST_mapping_mode() const { return mapping_mode(); } + Status TEST_annotate_file_schema(std::vector* file_schema) { + return annotate_file_schema(file_schema); + } +#endif + +protected: + format::TableColumnMappingMode mapping_mode() const override; + Status annotate_file_schema(std::vector* file_schema) override; + +private: + int64_t _split_schema_id = -1; +}; + +// Hudi MOR scans can contain both JNI splits that need log-file merge semantics and native +// data-file splits without delta logs in the same SplitSource. FileScannerV2 owns one table reader +// for the scanner lifetime, so this reader keeps native and JNI child readers internally and +// dispatches each split to the matching child reader. +class HudiHybridReader final : public format::TableReader { +public: + ~HudiHybridReader() override = default; + + Status init(format::TableReadOptions&& options) override; + Status prepare_split(const format::SplitReadOptions& options) override; + Status get_block(Block* block, bool* eos) override; + Status close() override; + +private: + Status _ensure_current_split_reader(const format::SplitReadOptions& options); + Status _init_child_reader(format::TableReader* reader, format::FileFormat file_format); + Status _clone_conjuncts(VExprContextSPtrs* conjuncts) const; + static TFileFormatType::type _range_format_type(const TFileScanRangeParams& params, + const TFileRangeDesc& range); + static bool _is_jni_split(const TFileScanRangeParams& params, const TFileRangeDesc& range); + static Status _to_file_format(const TFileScanRangeParams& params, const TFileRangeDesc& range, + format::FileFormat* file_format); + + std::unique_ptr _native_reader; // handle native parquet/orc splits + std::unique_ptr _jni_reader; // handle MOR JNI splits + format::TableReader* _current_split_reader = nullptr; +}; + +} // namespace doris::format::hudi diff --git a/be/src/format_v2/table/iceberg_reader.cpp b/be/src/format_v2/table/iceberg_reader.cpp new file mode 100644 index 00000000000000..ccc100f05044cc --- /dev/null +++ b/be/src/format_v2/table/iceberg_reader.cpp @@ -0,0 +1,797 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/table/iceberg_reader.h" + +#include +#include +#include +#include +#include + +#include "common/cast_set.h" +#include "common/consts.h" +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_const.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_struct.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/define_primitive_type.h" +#include "core/field.h" +#include "exprs/vslot_ref.h" +#include "format/table/deletion_vector_reader.h" +#include "format_v2/expr/cast.h" +#include "format_v2/expr/equality_delete_predicate.h" +#include "format_v2/parquet/parquet_reader.h" +#include "format_v2/parquet/reader/column_reader.h" +#include "format_v2/table_reader.h" +#include "io/file_factory.h" + +namespace doris::format::iceberg { + +static constexpr const char* ROW_LINEAGE_ROW_ID = "_row_id"; +static constexpr int32_t ROW_LINEAGE_ROW_ID_FIELD_ID = 2147483540; + +template +static std::string join_values_for_debug(const std::vector& values) { + std::ostringstream out; + out << "["; + for (size_t idx = 0; idx < values.size(); ++idx) { + if (idx > 0) { + out << ", "; + } + out << values[idx]; + } + out << "]"; + return out.str(); +} + +static bool is_projected_row_lineage_row_id(const format::ColumnDefinition& column) { + // Iceberg row lineage columns can be bound by field id when a mapper has already been built, + // but customize_file_scan_request() is also exercised directly by scan-request tests before the + // mapper exists. In that path, inspect the projected table schema so row-position dependencies + // are still added for `_row_id`. + return column.name == ROW_LINEAGE_ROW_ID || + (column.has_identifier_field_id() && + column.get_identifier_field_id() == ROW_LINEAGE_ROW_ID_FIELD_ID); +} + +static bool is_projected_iceberg_rowid(const format::ColumnDefinition& column) { + return column.name == BeConsts::ICEBERG_ROWID_COL; +} + +static std::string iceberg_delete_file_debug_string(const TIcebergDeleteFileDesc& delete_file) { + std::ostringstream out; + out << "TIcebergDeleteFileDesc{path=" << (delete_file.__isset.path ? delete_file.path : "null") + << ", content=" << (delete_file.__isset.content ? delete_file.content : -1) + << ", file_format=" + << (delete_file.__isset.file_format ? static_cast(delete_file.file_format) : -1) + << ", position_lower_bound=" + << (delete_file.__isset.position_lower_bound ? delete_file.position_lower_bound : -1) + << ", position_upper_bound=" + << (delete_file.__isset.position_upper_bound ? delete_file.position_upper_bound : -1) + << ", field_ids=" + << (delete_file.__isset.field_ids ? join_values_for_debug(delete_file.field_ids) : "[]") + << ", content_offset=" + << (delete_file.__isset.content_offset ? delete_file.content_offset : -1) + << ", content_size_in_bytes=" + << (delete_file.__isset.content_size_in_bytes ? delete_file.content_size_in_bytes : -1) + << "}"; + return out.str(); +} + +static std::string iceberg_delete_files_debug_string( + const std::vector& delete_files) { + std::ostringstream out; + out << "["; + for (size_t idx = 0; idx < delete_files.size(); ++idx) { + if (idx > 0) { + out << ", "; + } + out << iceberg_delete_file_debug_string(delete_files[idx]); + } + out << "]"; + return out.str(); +} + +static std::string iceberg_params_debug_string(const std::optional& params) { + if (!params.has_value()) { + return "null"; + } + const auto& iceberg_params = *params; + std::ostringstream out; + out << "TIcebergFileDesc{format_version=" + << (iceberg_params.__isset.format_version ? iceberg_params.format_version : -1) + << ", content=" << (iceberg_params.__isset.content ? iceberg_params.content : -1) + << ", original_file_path=" + << (iceberg_params.__isset.original_file_path ? iceberg_params.original_file_path : "null") + << ", row_count=" << (iceberg_params.__isset.row_count ? iceberg_params.row_count : -1) + << ", partition_spec_id=" + << (iceberg_params.__isset.partition_spec_id ? iceberg_params.partition_spec_id : 0) + << ", has_partition_data_json=" << iceberg_params.__isset.partition_data_json + << ", first_row_id=" + << (iceberg_params.__isset.first_row_id ? iceberg_params.first_row_id : -1) + << ", last_updated_sequence_number=" + << (iceberg_params.__isset.last_updated_sequence_number + ? iceberg_params.last_updated_sequence_number + : -1) + << ", delete_file_count=" + << (iceberg_params.__isset.delete_files ? iceberg_params.delete_files.size() : 0) + << ", delete_files=" + << (iceberg_params.__isset.delete_files + ? iceberg_delete_files_debug_string(iceberg_params.delete_files) + : "[]") + << ", has_serialized_split=" << iceberg_params.__isset.serialized_split << "}"; + return out.str(); +} + +IcebergTableReader::PositionDeleteRowsCollector::PositionDeleteRowsCollector( + std::string data_file_path, format::DeleteRows* rows) + : _data_file_path(std::move(data_file_path)), _rows(rows) {} + +Status IcebergTableReader::PositionDeleteRowsCollector::collect(const Block& block, + size_t read_rows) { + if (read_rows == 0) { + return Status::OK(); + } + const auto& file_path_column = assert_cast( + *remove_nullable((block.get_by_position(ICEBERG_FILE_PATH_BLOCK_POSITION).column))); + const auto& pos_column = assert_cast( + *remove_nullable(block.get_by_position(ICEBERG_ROW_POS_BLOCK_POSITION).column)); + for (size_t row = 0; row < read_rows; ++row) { + const auto file_path = file_path_column.get_data_at(row).to_string(); + if (file_path == _data_file_path) { + _rows->push_back(pos_column.get_element(row)); + } + } + return Status::OK(); +} + +Status IcebergTableReader::prepare_split(const format::SplitReadOptions& options) { + _row_lineage_columns = {}; + _iceberg_params.reset(); + _delete_predicates_initialized = false; + _position_delete_rows_storage.clear(); + _equality_delete_filters.clear(); + if (options.current_range.__isset.table_format_params && + options.current_range.table_format_params.__isset.iceberg_params) { + const auto& iceberg_params = options.current_range.table_format_params.iceberg_params; + _iceberg_params = iceberg_params; + if (iceberg_params.__isset.first_row_id) { + _row_lineage_columns.first_row_id = iceberg_params.first_row_id; + } + if (iceberg_params.__isset.last_updated_sequence_number) { + _row_lineage_columns.last_updated_sequence_number = + iceberg_params.last_updated_sequence_number; + } + } + RETURN_IF_ERROR(TableReader::prepare_split(options)); + if (_is_table_level_count_active()) { + return Status::OK(); + } + RETURN_IF_ERROR(_init_delete_predicates(options.current_range.table_format_params)); + return Status::OK(); +} + +std::string IcebergTableReader::debug_string() const { + size_t position_delete_file_count = 0; + size_t equality_delete_file_count = 0; + size_t deletion_vector_file_count = 0; + if (_iceberg_params.has_value() && _iceberg_params->__isset.delete_files) { + for (const auto& delete_file : _iceberg_params->delete_files) { + if (!delete_file.__isset.content) { + continue; + } + if (delete_file.content == POSITION_DELETE) { + ++position_delete_file_count; + } else if (delete_file.content == EQUALITY_DELETE) { + ++equality_delete_file_count; + } else if (delete_file.content == DELETION_VECTOR) { + ++deletion_vector_file_count; + } + } + } + + std::ostringstream equality_filters; + equality_filters << "["; + for (size_t idx = 0; idx < _equality_delete_filters.size(); ++idx) { + if (idx > 0) { + equality_filters << ", "; + } + const auto& filter = _equality_delete_filters[idx]; + equality_filters << "EqualityDeleteFilter{field_ids=" + << join_values_for_debug(filter.field_ids) << ", key_types=["; + for (size_t type_idx = 0; type_idx < filter.key_types.size(); ++type_idx) { + if (type_idx > 0) { + equality_filters << ", "; + } + equality_filters << (filter.key_types[type_idx] == nullptr + ? "null" + : filter.key_types[type_idx]->get_name()); + } + equality_filters << "], delete_block_rows=" << filter.delete_block.rows() + << ", delete_block_columns=" << filter.delete_block.columns() << "}"; + } + equality_filters << "]"; + + std::ostringstream out; + out << "IcebergTableReader{base=" << format::TableReader::debug_string() + << ", iceberg_params=" << iceberg_params_debug_string(_iceberg_params) + << ", row_lineage_first_row_id=" << _row_lineage_columns.first_row_id + << ", row_lineage_last_updated_sequence_number=" + << _row_lineage_columns.last_updated_sequence_number + << ", need_row_lineage_row_id=" << _need_row_lineage_row_id() + << ", need_iceberg_rowid=" << _need_iceberg_rowid() + << ", row_position_block_position=" << _row_position_block_position + << ", delete_predicates_initialized=" << _delete_predicates_initialized + << ", position_delete_file_count=" << position_delete_file_count + << ", equality_delete_file_count=" << equality_delete_file_count + << ", deletion_vector_file_count=" << deletion_vector_file_count + << ", position_delete_rows_storage_count=" << _position_delete_rows_storage.size() + << ", equality_delete_filter_count=" << _equality_delete_filters.size() + << ", equality_delete_filters=" << equality_filters.str() << "}"; + return out.str(); +} + +Status IcebergTableReader::materialize_virtual_columns(Block* table_block) { + for (size_t column_idx = 0; column_idx < _data_reader.column_mapper->mappings().size(); + ++column_idx) { + const auto& mapping = _data_reader.column_mapper->mappings()[column_idx]; + switch (mapping.virtual_column_type) { + case format::TableVirtualColumnType::ROW_ID: + RETURN_IF_ERROR(_materialize_row_lineage_row_id(table_block, column_idx)); + break; + case format::TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER: + RETURN_IF_ERROR( + _materialize_row_lineage_last_updated_sequence_number(table_block, column_idx)); + break; + case format::TableVirtualColumnType::ICEBERG_ROWID: + RETURN_IF_ERROR(_materialize_iceberg_rowid(table_block, column_idx)); + break; + case format::TableVirtualColumnType::INVALID: + break; + } + } + return Status::OK(); +} + +Status IcebergTableReader::customize_file_scan_request(format::FileScanRequest* file_request) { + RETURN_IF_ERROR(TableReader::customize_file_scan_request(file_request)); + if ((_row_lineage_columns.first_row_id >= 0 && _need_row_lineage_row_id()) || + _need_iceberg_rowid()) { + RETURN_IF_ERROR(_append_row_position_output_column(file_request)); + } + RETURN_IF_ERROR(_append_equality_delete_predicates(file_request)); + return Status::OK(); +} + +bool IcebergTableReader::_supports_aggregate_pushdown(TPushAggOp::type agg_type) const { + if (!TableReader::_supports_aggregate_pushdown(agg_type)) { + return false; + } + return _equality_delete_filters.empty(); +} + +Status IcebergTableReader::_parse_deletion_vector_file(const TTableFormatFileDesc& t_desc, + DeleteFileDesc* desc, + bool* has_delete_file) { + DORIS_CHECK(desc != nullptr); + DORIS_CHECK(has_delete_file != nullptr); + *has_delete_file = false; + if (!t_desc.__isset.iceberg_params) { + return Status::OK(); + } + const auto& iceberg_params = t_desc.iceberg_params; + if (!iceberg_params.__isset.format_version || + iceberg_params.format_version < MIN_SUPPORT_DELETE_FILES_VERSION || + !iceberg_params.__isset.delete_files || iceberg_params.delete_files.empty()) { + return Status::OK(); + } + + const TIcebergDeleteFileDesc* deletion_vector = nullptr; + for (const auto& delete_file : iceberg_params.delete_files) { + if (!delete_file.__isset.content || delete_file.content != DELETION_VECTOR) { + continue; + } + if (deletion_vector != nullptr) { + return Status::DataQualityError("This iceberg data file has multiple DVs."); + } + deletion_vector = &delete_file; + } + if (deletion_vector == nullptr) { + return Status::OK(); + } + if (!deletion_vector->__isset.content_offset || + !deletion_vector->__isset.content_size_in_bytes) { + return Status::InternalError("Deletion vector is missing content offset or length"); + } + + desc->key = _iceberg_delete_vector_cache_key(*deletion_vector); + desc->path = deletion_vector->path; + desc->start_offset = deletion_vector->content_offset; + desc->size = deletion_vector->content_size_in_bytes; + desc->file_size = -1; + desc->format = DeleteFileDesc::Format::ICEBERG; + *has_delete_file = true; + return Status::OK(); +} + +Status IcebergTableReader::_init_delete_predicates(const TTableFormatFileDesc& t_desc) { + if (!t_desc.__isset.iceberg_params || _delete_predicates_initialized) { + _delete_predicates_initialized = true; + return Status::OK(); + } + const auto& iceberg_params = t_desc.iceberg_params; + if (!iceberg_params.__isset.format_version || + iceberg_params.format_version < MIN_SUPPORT_DELETE_FILES_VERSION || + !iceberg_params.__isset.delete_files || iceberg_params.delete_files.empty()) { + _delete_predicates_initialized = true; + return Status::OK(); + } + + std::vector position_delete_files; + std::vector equality_delete_files; + for (const auto& delete_file : iceberg_params.delete_files) { + if (!delete_file.__isset.content) { + continue; + } + if (delete_file.content == POSITION_DELETE) { + position_delete_files.push_back(delete_file); + } else if (delete_file.content == EQUALITY_DELETE) { + equality_delete_files.push_back(delete_file); + } + } + // `_delete_rows != nullptr` means DeleteVector is parsed + if (_delete_rows != nullptr) { + _position_delete_rows_storage = *_delete_rows; + _delete_rows = &_position_delete_rows_storage; + } + // Combine position delete rows from both deletion vector and position delete files, and + // initialize equality delete predicates. Position delete files contain row positions of + // deleted rows, which can be directly added to `_delete_rows`. Equality delete files contain + // values of deleted rows, which require reading the files and building predicates for later + // filtering. + if (!position_delete_files.empty()) { + RETURN_IF_ERROR(_init_position_delete_rows(position_delete_files)); + } + if (!equality_delete_files.empty()) { + RETURN_IF_ERROR(_init_equality_delete_predicates(equality_delete_files)); + } + + _delete_predicates_initialized = true; + return Status::OK(); +} + +std::string IcebergTableReader::_iceberg_delete_vector_cache_key( + const TIcebergDeleteFileDesc& delete_file) { + const std::string key_prefix = "iceberg_dv:"; + std::string key; + key.resize(key_prefix.size() + delete_file.path.size() + sizeof(delete_file.content_offset) + + sizeof(delete_file.content_size_in_bytes)); + char* data = key.data(); + memcpy(data, key_prefix.data(), key_prefix.size()); + data += key_prefix.size(); + memcpy(data, delete_file.path.data(), delete_file.path.size()); + data += delete_file.path.size(); + memcpy(data, &delete_file.content_offset, sizeof(delete_file.content_offset)); + data += sizeof(delete_file.content_offset); + memcpy(data, &delete_file.content_size_in_bytes, sizeof(delete_file.content_size_in_bytes)); + return key; +} + +std::shared_ptr IcebergTableReader::_delete_file_system_properties( + const TFileScanRangeParams& scan_params) { + auto system_properties = std::make_shared(); + system_properties->system_type = + scan_params.__isset.file_type ? scan_params.file_type : TFileType::FILE_LOCAL; + system_properties->properties = scan_params.properties; + system_properties->hdfs_params = scan_params.hdfs_params; + if (scan_params.__isset.broker_addresses) { + system_properties->broker_addresses.assign(scan_params.broker_addresses.begin(), + scan_params.broker_addresses.end()); + } + return system_properties; +} + +std::unique_ptr IcebergTableReader::_delete_file_description( + const TFileRangeDesc& range) { + auto file_description = std::make_unique(); + file_description->path = range.path; + file_description->file_size = range.__isset.file_size ? range.file_size : -1; + file_description->range_start_offset = range.__isset.start_offset ? range.start_offset : 0; + file_description->range_size = range.__isset.size ? range.size : -1; + if (range.__isset.fs_name) { + file_description->fs_name = range.fs_name; + } + return file_description; +} + +std::string IcebergTableReader::_data_file_path() const { + if (_iceberg_params.has_value() && _iceberg_params->__isset.original_file_path) { + return _iceberg_params->original_file_path; + } + DORIS_CHECK(_current_task != nullptr); + DORIS_CHECK(_current_task->data_file != nullptr); + return _current_task->data_file->path; +} + +Status IcebergTableReader::_append_row_position_output_column(format::FileScanRequest* request) { + const auto row_position_column_id = format::LocalColumnId(format::ROW_POSITION_COLUMN_ID); + _append_file_scan_column(request, row_position_column_id, &request->non_predicate_columns); + _row_position_block_position = request->local_positions.at(row_position_column_id).value(); + return Status::OK(); +} + +Status IcebergTableReader::_append_equality_delete_predicates(format::FileScanRequest* request) { + DORIS_CHECK(request != nullptr); + for (const auto& filter : _equality_delete_filters) { + auto delete_predicate = + std::make_shared(filter.delete_block, filter.field_ids); + DCHECK_EQ(filter.field_ids.size(), filter.key_types.size()); + for (size_t idx = 0; idx < filter.field_ids.size(); ++idx) { + const int field_id = filter.field_ids[idx]; + auto field_it = std::ranges::find_if( + _data_reader.file_schema, [field_id](const format::ColumnDefinition& field) { + return field.has_identifier_field_id() && + field.get_identifier_field_id() == field_id; + }); + if (field_it == _data_reader.file_schema.end()) { + return Status::InternalError( + "Can not find equality delete column field id {} in data file schema", + field_id); + } + const auto field_column_id = format::LocalColumnId(field_it->file_local_id()); + _append_file_scan_column(request, field_column_id, &request->predicate_columns); + const auto block_position = request->local_positions.at(field_column_id).value(); + auto slot = VSlotRef::create_shared(cast_set(block_position), + cast_set(block_position), -1, field_it->type, + field_it->name); + if (field_it->type->equals(*filter.key_types[idx])) { + delete_predicate->add_child(std::move(slot)); + } else { + auto cast_expr = Cast::create_shared(filter.key_types[idx]); + cast_expr->add_child(std::move(slot)); + delete_predicate->add_child(std::move(cast_expr)); + } + } + request->delete_conjuncts.push_back( + VExprContext::create_shared(std::move(delete_predicate))); + } + return Status::OK(); +} + +Status IcebergTableReader::_read_parquet_position_delete_file( + const TIcebergDeleteFileDesc& delete_file, const TFileScanRangeParams& scan_params, + IcebergDeleteFileIOContext* delete_io_ctx, PositionDeleteRowsCollector* collector) { + if (!delete_file.__isset.file_format) { + return Status::InternalError("Iceberg position delete file is missing file format"); + } + if (delete_file.file_format == TFileFormatType::FORMAT_ORC) { + return Status::NotSupported("Iceberg ORC position delete file is not supported"); + } + if (delete_file.file_format != TFileFormatType::FORMAT_PARQUET) { + return Status::NotSupported("Unsupported Iceberg delete file format {}", + delete_file.file_format); + } + + auto delete_range = build_iceberg_delete_file_range(delete_file.path); + if (_current_task != nullptr && _current_task->data_file != nullptr && + !_current_task->data_file->fs_name.empty()) { + delete_range.__set_fs_name(_current_task->data_file->fs_name); + } + auto system_properties = _delete_file_system_properties(scan_params); + auto file_description = _delete_file_description(delete_range); + std::shared_ptr io_ctx(&delete_io_ctx->io_ctx, [](io::IOContext*) {}); + format::parquet::ParquetReader reader(system_properties, file_description, io_ctx, + _scanner_profile); + RETURN_IF_ERROR(reader.init(_runtime_state)); + + std::vector schema; + RETURN_IF_ERROR(reader.get_schema(&schema)); + format::ColumnDefinition* file_path_field = nullptr; + format::ColumnDefinition* pos_field = nullptr; + for (auto& field : schema) { + if (field.name == ICEBERG_FILE_PATH) { + file_path_field = &field; + } else if (field.name == ICEBERG_ROW_POS) { + pos_field = &field; + } + } + if (file_path_field == nullptr || pos_field == nullptr) { + return Status::InternalError("Position delete parquet file is missing required columns"); + } + + auto request = std::make_shared(); + request->non_predicate_columns = { + format::LocalColumnIndex::top_level( + format::LocalColumnId(file_path_field->file_local_id())), + format::LocalColumnIndex::top_level(format::LocalColumnId(pos_field->file_local_id()))}; + request->local_positions = { + {format::LocalColumnId(file_path_field->file_local_id()), + format::LocalIndex(ICEBERG_FILE_PATH_BLOCK_POSITION)}, + {format::LocalColumnId(pos_field->file_local_id()), + format::LocalIndex(ICEBERG_ROW_POS_BLOCK_POSITION)}, + }; + RETURN_IF_ERROR(reader.open(request)); + + bool eof = false; + auto build_position_delete_block = [](const format::ColumnDefinition& file_path_field, + const format::ColumnDefinition& pos_field) -> Block { + Block block; + block.insert( + {file_path_field.type->create_column(), file_path_field.type, ICEBERG_FILE_PATH}); + block.insert({pos_field.type->create_column(), pos_field.type, ICEBERG_ROW_POS}); + return block; + }; + while (!eof) { + Block block = build_position_delete_block(*file_path_field, *pos_field); + size_t read_rows = 0; + RETURN_IF_ERROR(reader.get_block(&block, &read_rows, &eof)); + RETURN_IF_ERROR(collector->collect(block, read_rows)); + } + return reader.close(); +} + +Status IcebergTableReader::_init_position_delete_rows( + const std::vector& delete_files) { + TFileScanRangeParams delete_scan_params = + _scan_params == nullptr ? TFileScanRangeParams() : *_scan_params; + format::DeleteRows position_delete_rows; + IcebergDeleteFileIOContext delete_io_ctx(_runtime_state); + PositionDeleteRowsCollector collector(_data_file_path(), &position_delete_rows); + for (const auto& delete_file : delete_files) { + RETURN_IF_ERROR(_read_parquet_position_delete_file(delete_file, delete_scan_params, + &delete_io_ctx, &collector)); + } + if (position_delete_rows.empty()) { + return Status::OK(); + } + // Position delete files and deletion vectors both become row-position deletes for the + // common TableReader DeletePredicate path. Keep the merged rows in a member vector because + // DeletePredicate stores a reference to the vector used by _delete_rows. + _position_delete_rows_storage.insert(_position_delete_rows_storage.end(), + position_delete_rows.begin(), position_delete_rows.end()); + std::sort(_position_delete_rows_storage.begin(), _position_delete_rows_storage.end()); + _position_delete_rows_storage.erase( + std::unique(_position_delete_rows_storage.begin(), _position_delete_rows_storage.end()), + _position_delete_rows_storage.end()); + _delete_rows = &_position_delete_rows_storage; + return Status::OK(); +} + +Status IcebergTableReader::_init_equality_delete_predicates( + const std::vector& delete_files) { + TFileScanRangeParams delete_scan_params = + _scan_params == nullptr ? TFileScanRangeParams() : *_scan_params; + IcebergDeleteFileIOContext delete_io_ctx(_runtime_state); + for (const auto& delete_file : delete_files) { + RETURN_IF_ERROR(_read_parquet_equality_delete_file(delete_file, delete_scan_params, + &delete_io_ctx)); + } + return Status::OK(); +} + +Status IcebergTableReader::_read_parquet_equality_delete_file( + const TIcebergDeleteFileDesc& delete_file, const TFileScanRangeParams& scan_params, + IcebergDeleteFileIOContext* delete_io_ctx) { + if (!delete_file.__isset.file_format) { + return Status::InternalError("Iceberg equality delete file is missing file format"); + } + if (delete_file.file_format != TFileFormatType::FORMAT_PARQUET) { + return Status::NotSupported("Unsupported Iceberg equality delete file format {}", + delete_file.file_format); + } + if (!delete_file.__isset.field_ids || delete_file.field_ids.empty()) { + return Status::InternalError("Iceberg equality delete file is missing field ids"); + } + + auto delete_range = build_iceberg_delete_file_range(delete_file.path); + if (_current_task != nullptr && _current_task->data_file != nullptr && + !_current_task->data_file->fs_name.empty()) { + delete_range.__set_fs_name(_current_task->data_file->fs_name); + } + auto system_properties = _delete_file_system_properties(scan_params); + auto file_description = _delete_file_description(delete_range); + std::shared_ptr io_ctx(&delete_io_ctx->io_ctx, [](io::IOContext*) {}); + format::parquet::ParquetReader reader(system_properties, file_description, io_ctx, + _scanner_profile); + RETURN_IF_ERROR(reader.init(_runtime_state)); + + std::vector schema; + RETURN_IF_ERROR(reader.get_schema(&schema)); + std::vector delete_fields; + std::vector delete_field_ids; + std::vector delete_key_types; + for (const auto field_id : delete_file.field_ids) { + auto field_it = std::find_if(schema.begin(), schema.end(), + [field_id](const format::ColumnDefinition& field) { + return field.has_identifier_field_id() && + field_id == field.get_identifier_field_id(); + }); + if (field_it == schema.end()) { + return Status::InternalError("Can not find field id {} in equality delete file {}", + field_id, delete_file.path); + } + if (!field_it->children.empty()) { + return Status::NotSupported( + "Iceberg equality delete does not support complex column {}", field_it->name); + } + delete_fields.push_back(*field_it); + delete_field_ids.push_back(field_id); + delete_key_types.push_back(field_it->type); + } + + auto request = std::make_shared(); + for (size_t idx = 0; idx < delete_fields.size(); ++idx) { + const auto local_column_id = format::LocalColumnId(delete_fields[idx].file_local_id()); + request->non_predicate_columns.push_back( + format::LocalColumnIndex::top_level(local_column_id)); + request->local_positions.emplace(local_column_id, format::LocalIndex(idx)); + } + RETURN_IF_ERROR(reader.open(request)); + + auto build_equality_delete_block = + [](const std::vector fields) -> Block { + Block block; + for (const auto& field : fields) { + block.insert({field.type->create_column(), field.type, field.name}); + } + return block; + }; + Block delete_block = build_equality_delete_block(delete_fields); + MutableBlock mutable_delete_block(std::move(delete_block)); + bool eof = false; + while (!eof) { + Block block = build_equality_delete_block(delete_fields); + size_t read_rows = 0; + RETURN_IF_ERROR(reader.get_block(&block, &read_rows, &eof)); + if (read_rows > 0) { + RETURN_IF_ERROR(mutable_delete_block.merge(block)); + } + } + RETURN_IF_ERROR(reader.close()); + delete_block = mutable_delete_block.to_block(); + _equality_delete_filters.push_back( + EqualityDeleteFilter {.field_ids = std::move(delete_field_ids), + .key_types = std::move(delete_key_types), + .delete_block = std::move(delete_block)}); + return Status::OK(); +} + +Status IcebergTableReader::_materialize_row_lineage_row_id(Block* table_block, size_t column_idx) { + if (_row_lineage_columns.first_row_id < 0) { + return Status::OK(); + } + DORIS_CHECK(_row_position_block_position < _data_reader.block_template.columns()); + const auto& row_position_column = assert_cast( + *_data_reader.block_template.get_by_position(_row_position_block_position).column); + DORIS_CHECK(row_position_column.size() == table_block->rows()); + auto column = IColumn::mutate( + table_block->get_by_position(column_idx).column->convert_to_full_column_if_const()); + auto* nullable_column = assert_cast(column.get()); + auto& null_map = nullable_column->get_null_map_data(); + auto& data = assert_cast(*nullable_column->get_nested_column_ptr()).get_data(); + DORIS_CHECK(null_map.size() == row_position_column.size()); + DORIS_CHECK(data.size() == row_position_column.size()); + for (size_t row = 0; row < row_position_column.size(); ++row) { + if (null_map[row]) { + null_map[row] = 0; + data[row] = _row_lineage_columns.first_row_id + row_position_column.get_element(row); + } + } + table_block->replace_by_position(column_idx, std::move(column)); + return Status::OK(); +} + +Status IcebergTableReader::_materialize_iceberg_rowid(Block* table_block, size_t column_idx) { + DORIS_CHECK(_row_position_block_position < _data_reader.block_template.columns()); + const auto& row_position_column = assert_cast( + *_data_reader.block_template.get_by_position(_row_position_block_position).column); + DORIS_CHECK(row_position_column.size() == table_block->rows()); + + const auto& type = table_block->get_by_position(column_idx).type; + auto column = type->create_column(); + auto* nullable_column = check_and_get_column(column.get()); + auto* struct_column = nullable_column != nullptr + ? check_and_get_column( + nullable_column->get_nested_column_ptr().get()) + : check_and_get_column(column.get()); + DORIS_CHECK(struct_column != nullptr); + DORIS_CHECK(struct_column->tuple_size() >= 4); + + const auto rows = row_position_column.size(); + const auto file_path = _data_file_path(); + const int32_t partition_spec_id = + _iceberg_params.has_value() && _iceberg_params->__isset.partition_spec_id + ? _iceberg_params->partition_spec_id + : 0; + const std::string partition_data_json = + _iceberg_params.has_value() && _iceberg_params->__isset.partition_data_json + ? _iceberg_params->partition_data_json + : ""; + + auto& file_path_column = struct_column->get_column(0); + auto& row_pos_column = struct_column->get_column(1); + auto& spec_id_column = struct_column->get_column(2); + auto& partition_data_column = struct_column->get_column(3); + file_path_column.reserve(rows); + row_pos_column.reserve(rows); + spec_id_column.reserve(rows); + partition_data_column.reserve(rows); + for (size_t row = 0; row < rows; ++row) { + file_path_column.insert_data(file_path.data(), file_path.size()); + const int64_t row_pos = row_position_column.get_element(row); + row_pos_column.insert_data(reinterpret_cast(&row_pos), sizeof(row_pos)); + spec_id_column.insert_data(reinterpret_cast(&partition_spec_id), + sizeof(partition_spec_id)); + partition_data_column.insert_data(partition_data_json.data(), partition_data_json.size()); + } + if (nullable_column != nullptr) { + nullable_column->get_null_map_data().resize_fill(rows, 0); + } + table_block->replace_by_position(column_idx, std::move(column)); + return Status::OK(); +} + +Status IcebergTableReader::_materialize_row_lineage_last_updated_sequence_number( + Block* table_block, size_t column_idx) { + if (_row_lineage_columns.last_updated_sequence_number < 0) { + return Status::OK(); + } + auto column = IColumn::mutate( + table_block->get_by_position(column_idx).column->convert_to_full_column_if_const()); + auto* nullable_column = assert_cast(column.get()); + auto& null_map = nullable_column->get_null_map_data(); + auto& data = assert_cast(*nullable_column->get_nested_column_ptr()).get_data(); + DORIS_CHECK(null_map.size() == table_block->rows()); + DORIS_CHECK(data.size() == table_block->rows()); + for (size_t row = 0; row < table_block->rows(); ++row) { + if (null_map[row]) { + null_map[row] = 0; + data[row] = _row_lineage_columns.last_updated_sequence_number; + } + } + table_block->replace_by_position(column_idx, std::move(column)); + return Status::OK(); +} + +bool IcebergTableReader::_need_row_lineage_row_id() const { + if (_data_reader.column_mapper != nullptr) { + for (const auto& mapping : _data_reader.column_mapper->mappings()) { + if (mapping.virtual_column_type == format::TableVirtualColumnType::ROW_ID) { + return true; + } + } + } + return std::ranges::any_of(_projected_columns, is_projected_row_lineage_row_id); +} + +bool IcebergTableReader::_need_iceberg_rowid() const { + if (_data_reader.column_mapper != nullptr) { + for (const auto& mapping : _data_reader.column_mapper->mappings()) { + if (mapping.virtual_column_type == format::TableVirtualColumnType::ICEBERG_ROWID) { + return true; + } + } + } + return std::ranges::any_of(_projected_columns, is_projected_iceberg_rowid); +} + +} // namespace doris::format::iceberg diff --git a/be/src/format_v2/table/iceberg_reader.h b/be/src/format_v2/table/iceberg_reader.h new file mode 100644 index 00000000000000..1a2811ef968277 --- /dev/null +++ b/be/src/format_v2/table/iceberg_reader.h @@ -0,0 +1,175 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +#include "common/status.h" +#include "core/block/block.h" +#include "format/table/iceberg_delete_file_reader_helper.h" +#include "format_v2/file_reader.h" +#include "format_v2/table_reader.h" +#include "gen_cpp/PlanNodes_types.h" + +namespace doris { +class Block; +struct DeleteFileDesc; +namespace io { +struct FileDescription; +struct FileSystemProperties; +} // namespace io +} // namespace doris + +namespace doris::format::iceberg { + +// Iceberg table-level reader. +// It reuses TableReader for split orchestration, dynamic partition pruning and table-block +// finalization, while composing a FileReader for physical data-file reads instead of inheriting +// from a concrete file-format reader. +class IcebergTableReader : public format::TableReader { +public: + ~IcebergTableReader() override = default; + Status init(format::TableReadOptions&& options) override { + RETURN_IF_ERROR(format::TableReader::init(std::move(options))); + _mapper_options.mode = format::TableColumnMappingMode::BY_FIELD_ID; + return Status::OK(); + } + + Status prepare_split(const format::SplitReadOptions& options) override; + std::string debug_string() const override; + format::TableColumnMappingMode mapping_mode() const override { + return !_data_reader.file_schema.empty() && _has_field_id(_data_reader.file_schema) + ? format::TableColumnMappingMode::BY_FIELD_ID + : format::TableColumnMappingMode::BY_NAME; + } + +protected: + Status materialize_virtual_columns(Block* table_block) override; + + Status customize_file_scan_request(format::FileScanRequest* file_request) override; + + bool _supports_aggregate_pushdown(TPushAggOp::type agg_type) const override; + + Status _parse_deletion_vector_file(const TTableFormatFileDesc& t_desc, DeleteFileDesc* desc, + bool* has_delete_file) override; + + Status _init_delete_predicates(const TTableFormatFileDesc& t_desc); + +private: + bool _has_field_id(const std::vector& schema) const { + for (const auto& field : schema) { + // TopN lazy materialization asks the file reader to synthesize GLOBAL_ROWID in the + // first-phase scan. That virtual column is not an Iceberg data field and therefore has + // no Iceberg field id. Do not let it downgrade schema-evolution reads to BY_NAME, + // otherwise old data files whose physical names predate a rename (for example, + // table column `new_new_id` stored as file column `id`) are materialized as defaults. + if (field.column_type != format::ColumnType::DATA_COLUMN) { + continue; + } + if (!field.has_identifier_field_id()) { + return false; + } + if (!_has_field_id(field.children)) { + return false; + } + } + return true; + } + static constexpr int MIN_SUPPORT_DELETE_FILES_VERSION = 2; + static constexpr int POSITION_DELETE = 1; + static constexpr int EQUALITY_DELETE = 2; + static constexpr int DELETION_VECTOR = 3; + + struct RowLineageColumns { + int64_t first_row_id = -1; + int64_t last_updated_sequence_number = -1; + }; + + static constexpr const char* ICEBERG_FILE_PATH = "file_path"; + static constexpr const char* ICEBERG_ROW_POS = "pos"; + static constexpr size_t ICEBERG_FILE_PATH_BLOCK_POSITION = 0; + static constexpr size_t ICEBERG_ROW_POS_BLOCK_POSITION = 1; + + class PositionDeleteRowsCollector final { + public: + PositionDeleteRowsCollector(std::string data_file_path, format::DeleteRows* rows); + + Status collect(const Block& block, size_t read_rows); + + private: + std::string _data_file_path; + format::DeleteRows* _rows = nullptr; + }; + + static std::string _iceberg_delete_vector_cache_key(const TIcebergDeleteFileDesc& delete_file); + + static std::shared_ptr _delete_file_system_properties( + const TFileScanRangeParams& scan_params); + + static std::unique_ptr _delete_file_description( + const TFileRangeDesc& range); + + std::string _data_file_path() const; + + // Append row position column to file scan request for position delete handling. + Status _append_row_position_output_column(format::FileScanRequest* request); + // Append equality delete predicates to file scan request based on the delete files in iceberg + // params. DeleteVector and position delete files use the common DeleteRows path in TableReader. + Status _append_equality_delete_predicates(format::FileScanRequest* request); + + Status _init_equality_delete_predicates( + const std::vector& delete_files); + + // Read equality/position delete files. + Status _read_parquet_equality_delete_file(const TIcebergDeleteFileDesc& delete_file, + const TFileScanRangeParams& scan_params, + IcebergDeleteFileIOContext* delete_io_ctx); + Status _read_parquet_position_delete_file(const TIcebergDeleteFileDesc& delete_file, + const TFileScanRangeParams& scan_params, + IcebergDeleteFileIOContext* delete_io_ctx, + PositionDeleteRowsCollector* collector); + + // Read position delete files and collect deleted row positions to update DeletePredicate. + Status _init_position_delete_rows(const std::vector& delete_files); + + // Materialize row lineage virtual columns based on the position delete file. + Status _materialize_iceberg_rowid(Block* table_block, size_t column_idx); + Status _materialize_row_lineage_row_id(Block* table_block, size_t column_idx); + Status _materialize_row_lineage_last_updated_sequence_number(Block* table_block, + size_t column_idx); + + RowLineageColumns _row_lineage_columns; + size_t _row_position_block_position = 0; + std::optional _iceberg_params; + bool _delete_predicates_initialized = false; + format::DeleteRows _position_delete_rows_storage; + struct EqualityDeleteFilter { + std::vector field_ids; + std::vector key_types; + Block delete_block; + }; + std::vector _equality_delete_filters; + + bool _need_row_lineage_row_id() const; + bool _need_iceberg_rowid() const; +}; + +} // namespace doris::format::iceberg diff --git a/be/src/format_v2/table/paimon_reader.cpp b/be/src/format_v2/table/paimon_reader.cpp new file mode 100644 index 00000000000000..c82c99dd2854fa --- /dev/null +++ b/be/src/format_v2/table/paimon_reader.cpp @@ -0,0 +1,194 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/table/paimon_reader.h" + +#include + +#include +#include +#include + +#include "exprs/vexpr_context.h" +#include "format/table/deletion_vector_reader.h" +#include "format_v2/column_mapper.h" +#include "format_v2/jni/paimon_jni_reader.h" +#include "format_v2/table/schema_history_util.h" +#include "gen_cpp/PlanNodes_types.h" + +namespace doris::format::paimon { + +Status PaimonReader::prepare_split(const format::SplitReadOptions& options) { + _split_schema_id = -1; + const auto& paimon_params = options.current_range.table_format_params.paimon_params; + if (paimon_params.__isset.schema_id) { + _split_schema_id = paimon_params.schema_id; + } + return format::TableReader::prepare_split(options); +} + +format::TableColumnMappingMode PaimonReader::mapping_mode() const { + return format::can_map_by_history_schema(_scan_params, _split_schema_id) + ? format::TableColumnMappingMode::BY_FIELD_ID + : format::TableColumnMappingMode::BY_NAME; +} + +Status PaimonReader::annotate_file_schema(std::vector* file_schema) { + DORIS_CHECK(file_schema != nullptr); + if (mapping_mode() != format::TableColumnMappingMode::BY_FIELD_ID) { + return Status::OK(); + } + return format::annotate_file_schema_from_history(_scan_params, _split_schema_id, file_schema); +} + +Status PaimonReader::_parse_deletion_vector_file(const TTableFormatFileDesc& t_desc, + DeleteFileDesc* desc, bool* has_delete_file) { + DORIS_CHECK(desc != nullptr); + DORIS_CHECK(has_delete_file != nullptr); + *has_delete_file = false; + const auto& table_desc = t_desc.paimon_params; + if (!table_desc.__isset.deletion_file) { + return Status::OK(); + } + const auto& deletion_file = table_desc.deletion_file; + + const std::string key_prefix = "paimon_dv:"; + desc->key.resize(key_prefix.size() + deletion_file.path.size() + sizeof(deletion_file.offset)); + char* key_data = desc->key.data(); + memcpy(key_data, key_prefix.data(), key_prefix.size()); + key_data += key_prefix.size(); + memcpy(key_data, deletion_file.path.data(), deletion_file.path.size()); + key_data += deletion_file.path.size(); + memcpy(key_data, &deletion_file.offset, sizeof(deletion_file.offset)); + desc->path = deletion_file.path; + desc->start_offset = deletion_file.offset; + desc->size = deletion_file.length + 4; + desc->file_size = -1; + desc->format = DeleteFileDesc::Format::PAIMON; + *has_delete_file = true; + return Status::OK(); +} + +Status PaimonHybridReader::init(format::TableReadOptions&& options) { + return format::TableReader::init(std::move(options)); +} + +Status PaimonHybridReader::prepare_split(const format::SplitReadOptions& options) { + RETURN_IF_ERROR(_ensure_current_split_reader(options)); + DORIS_CHECK(_current_split_reader != nullptr); + return _current_split_reader->prepare_split(options); +} + +Status PaimonHybridReader::get_block(Block* block, bool* eos) { + DORIS_CHECK(_current_split_reader != nullptr); + return _current_split_reader->get_block(block, eos); +} + +Status PaimonHybridReader::close() { + Status close_status = Status::OK(); + if (_native_reader != nullptr) { + close_status = _native_reader->close(); + } + if (_jni_reader != nullptr) { + auto status = _jni_reader->close(); + if (!status.ok() && close_status.ok()) { + close_status = std::move(status); + } + } + _current_split_reader = nullptr; + return close_status; +} + +Status PaimonHybridReader::_ensure_current_split_reader(const format::SplitReadOptions& options) { + if (_is_jni_split(options.current_range)) { + DCHECK(options.current_split_format == format::FileFormat::JNI); + if (_jni_reader == nullptr) { + _jni_reader = std::make_unique(); + RETURN_IF_ERROR(_init_child_reader(_jni_reader.get(), format::FileFormat::JNI)); + } + _current_split_reader = _jni_reader.get(); + } else { + format::FileFormat file_format; + RETURN_IF_ERROR(_to_file_format(options.current_range, &file_format)); + DCHECK(options.current_split_format == file_format); + DCHECK(file_format == format::FileFormat::PARQUET || + file_format == format::FileFormat::ORC); + if (_native_reader == nullptr) { + _native_reader = format::paimon::PaimonReader::create_unique(); + RETURN_IF_ERROR(_init_child_reader(_native_reader.get(), file_format)); + } + _current_split_reader = _native_reader.get(); + } + return Status::OK(); +} + +Status PaimonHybridReader::_init_child_reader(format::TableReader* reader, + format::FileFormat file_format) { + DORIS_CHECK(reader != nullptr); + VExprContextSPtrs conjuncts; + RETURN_IF_ERROR(_clone_conjuncts(&conjuncts)); + return reader->init({ + .projected_columns = _projected_columns, + .column_predicates = _table_column_predicates, + .conjuncts = std::move(conjuncts), + .format = file_format, + .scan_params = _scan_params, + .io_ctx = _io_ctx, + .runtime_state = _runtime_state, + .scanner_profile = _scanner_profile, + .push_down_agg_type = _push_down_agg_type, + .condition_cache_digest = _condition_cache_digest, + }); +} + +Status PaimonHybridReader::_clone_conjuncts(VExprContextSPtrs* conjuncts) const { + DORIS_CHECK(conjuncts != nullptr); + conjuncts->clear(); + conjuncts->reserve(_conjuncts.size()); + for (const auto& conjunct : _conjuncts) { + VExprSPtr root; + RETURN_IF_ERROR(format::clone_table_expr_tree(conjunct->root(), &root)); + conjuncts->push_back(VExprContext::create_shared(std::move(root))); + } + return Status::OK(); +} + +bool PaimonHybridReader::_is_jni_split(const TFileRangeDesc& range) { + return range.__isset.table_format_params && range.table_format_params.__isset.paimon_params && + range.table_format_params.paimon_params.__isset.reader_type && + range.table_format_params.paimon_params.reader_type == TPaimonReaderType::PAIMON_JNI; +} + +Status PaimonHybridReader::_to_file_format(const TFileRangeDesc& range, + format::FileFormat* file_format) { + DORIS_CHECK(file_format != nullptr); + const auto format_type = + range.__isset.format_type ? range.format_type : TFileFormatType::FORMAT_PARQUET; + switch (format_type) { + case TFileFormatType::FORMAT_PARQUET: + *file_format = format::FileFormat::PARQUET; + return Status::OK(); + case TFileFormatType::FORMAT_ORC: + *file_format = format::FileFormat::ORC; + return Status::OK(); + default: + return Status::NotSupported("Unsupported native Paimon file format {}", + to_string(format_type)); + } +} + +} // namespace doris::format::paimon diff --git a/be/src/format_v2/table/paimon_reader.h b/be/src/format_v2/table/paimon_reader.h new file mode 100644 index 00000000000000..200c4e885b5055 --- /dev/null +++ b/be/src/format_v2/table/paimon_reader.h @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "format_v2/table_reader.h" + +namespace doris { +struct DeleteFileDesc; +} +namespace doris::format::paimon { + +class PaimonReader final : public format::TableReader { +public: + ENABLE_FACTORY_CREATOR(PaimonReader); + ~PaimonReader() final = default; + Status prepare_split(const format::SplitReadOptions& options) override; + +#ifdef BE_TEST + void TEST_set_scan_params(TFileScanRangeParams* params) { _scan_params = params; } + format::TableColumnMappingMode TEST_mapping_mode() const { return mapping_mode(); } + Status TEST_annotate_file_schema(std::vector* file_schema) { + return annotate_file_schema(file_schema); + } +#endif + +protected: + format::TableColumnMappingMode mapping_mode() const override; + Status annotate_file_schema(std::vector* file_schema) override; + + Status _parse_deletion_vector_file(const TTableFormatFileDesc& t_desc, DeleteFileDesc* desc, + bool* has_delete_file) override; + +private: + int64_t _split_schema_id = -1; +}; + +// Paimon scans can contain both native data-file splits and serialized JNI splits in the same +// SplitSource. FileScannerV2 owns one table reader for the scanner lifetime, so this reader keeps +// native and JNI child readers internally and dispatches each split to the matching child reader. +class PaimonHybridReader final : public format::TableReader { +public: + ~PaimonHybridReader() override = default; + + Status init(format::TableReadOptions&& options) override; + Status prepare_split(const format::SplitReadOptions& options) override; + Status get_block(Block* block, bool* eos) override; + Status close() override; + +#ifdef BE_TEST + static bool TEST_is_jni_split(const TFileRangeDesc& range) { return _is_jni_split(range); } + static Status TEST_to_file_format(const TFileRangeDesc& range, + format::FileFormat* file_format) { + return _to_file_format(range, file_format); + } +#endif + +private: + Status _ensure_current_split_reader(const format::SplitReadOptions& options); + Status _init_child_reader(format::TableReader* reader, format::FileFormat file_format); + Status _clone_conjuncts(VExprContextSPtrs* conjuncts) const; + static bool _is_jni_split(const TFileRangeDesc& range); + static Status _to_file_format(const TFileRangeDesc& range, format::FileFormat* file_format); + + std::unique_ptr _native_reader; // handle parquet/orc native splits + std::unique_ptr _jni_reader; // handle serialized JNI splits + format::TableReader* _current_split_reader = nullptr; +}; + +} // namespace doris::format::paimon diff --git a/be/src/format_v2/table/remote_doris_reader.cpp b/be/src/format_v2/table/remote_doris_reader.cpp new file mode 100644 index 00000000000000..39580fd2561897 --- /dev/null +++ b/be/src/format_v2/table/remote_doris_reader.cpp @@ -0,0 +1,365 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/table/remote_doris_reader.h" + +#include +#include + +#include +#include +#include +#include + +#include "common/cast_set.h" +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/data_type/data_type.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_struct.h" +#include "core/data_type_serde/data_type_serde.h" +#include "format/arrow/arrow_utils.h" +#include "format_v2/materialized_reader_util.h" +#include "runtime/descriptors.h" +#include "runtime/runtime_state.h" +#include "util/timezone_utils.h" + +namespace doris::format::remote_doris { +namespace { + +Status validate_remote_doris_range(const TFileRangeDesc& range) { + if (!range.__isset.table_format_params || + range.table_format_params.table_format_type != "remote_doris") { + return Status::InvalidArgument("Remote Doris v2 reader requires remote_doris table format"); + } + if (!range.table_format_params.__isset.remote_doris_params) { + return Status::InvalidArgument("Remote Doris v2 reader requires remote_doris_params"); + } + const auto& params = range.table_format_params.remote_doris_params; + if (!params.__isset.location_uri || params.location_uri.empty()) { + return Status::InvalidArgument("Remote Doris v2 reader requires location_uri"); + } + if (!params.__isset.ticket || params.ticket.empty()) { + return Status::InvalidArgument("Remote Doris v2 reader requires ticket"); + } + return Status::OK(); +} + +class FlightRemoteDorisStream final : public RemoteDorisStream { +public: + explicit FlightRemoteDorisStream(const TFileRangeDesc& range) : _range(range) {} + + Status open() { + RETURN_IF_ERROR(validate_remote_doris_range(_range)); + const auto& params = _range.table_format_params.remote_doris_params; + arrow::flight::Location location; + RETURN_DORIS_STATUS_IF_ERROR( + arrow::flight::Location::Parse(params.location_uri).Value(&location)); + arrow::flight::Ticket ticket; + RETURN_DORIS_STATUS_IF_ERROR( + arrow::flight::Ticket::Deserialize(params.ticket).Value(&ticket)); + RETURN_DORIS_STATUS_IF_ERROR( + arrow::flight::FlightClient::Connect(location).Value(&_flight_client)); + RETURN_DORIS_STATUS_IF_ERROR(_flight_client->DoGet(ticket).Value(&_stream)); + return Status::OK(); + } + + Status next(std::shared_ptr* batch) override { + DORIS_CHECK(batch != nullptr); + arrow::flight::FlightStreamChunk chunk; + RETURN_DORIS_STATUS_IF_ERROR(_stream->Next().Value(&chunk)); + *batch = chunk.data; + return Status::OK(); + } + + Status close() override { + _stream.reset(); + if (_flight_client != nullptr) { + RETURN_DORIS_STATUS_IF_ERROR(_flight_client->Close()); + _flight_client.reset(); + } + return Status::OK(); + } + +private: + const TFileRangeDesc _range; + std::unique_ptr _flight_client; + std::unique_ptr _stream; +}; + +Status create_flight_stream(const TFileRangeDesc& range, std::unique_ptr* out) { + DORIS_CHECK(out != nullptr); + auto stream = std::make_unique(range); + RETURN_IF_ERROR(stream->open()); + *out = std::move(stream); + return Status::OK(); +} + +ColumnDefinition remote_doris_child_definition(const std::string& name, DataTypePtr type, + int32_t local_id); + +std::vector synthesize_remote_doris_children(const DataTypePtr& type) { + std::vector children; + DORIS_CHECK(type != nullptr); + const auto nested_type = remove_nullable(type); + switch (nested_type->get_primitive_type()) { + case TYPE_ARRAY: { + const auto* array_type = assert_cast(nested_type.get()); + children.push_back( + remote_doris_child_definition("element", array_type->get_nested_type(), 0)); + break; + } + case TYPE_MAP: { + const auto* map_type = assert_cast(nested_type.get()); + children.push_back(remote_doris_child_definition("key", map_type->get_key_type(), 0)); + children.push_back(remote_doris_child_definition("value", map_type->get_value_type(), 1)); + break; + } + case TYPE_STRUCT: { + const auto* struct_type = assert_cast(nested_type.get()); + children.reserve(struct_type->get_elements().size()); + for (size_t idx = 0; idx < struct_type->get_elements().size(); ++idx) { + children.push_back(remote_doris_child_definition(struct_type->get_element_name(idx), + struct_type->get_element(idx), + cast_set(idx))); + } + break; + } + default: + break; + } + return children; +} + +ColumnDefinition remote_doris_child_definition(const std::string& name, DataTypePtr type, + int32_t local_id) { + ColumnDefinition child; + child.identifier = Field::create_field(name); + child.local_id = local_id; + child.name = name; + child.type = std::move(type); + child.children = synthesize_remote_doris_children(child.type); + return child; +} + +} // namespace + +RemoteDorisFileReader::RemoteDorisFileReader( + std::shared_ptr& system_properties, + std::unique_ptr& file_description, + std::shared_ptr io_ctx, RuntimeProfile* profile, const TFileRangeDesc& range, + const std::vector& file_slot_descs, + RemoteDorisStreamFactory stream_factory) + : FileReader(system_properties, file_description, std::move(io_ctx), profile), + _range(range), + _file_slot_descs(file_slot_descs), + _stream_factory(std::move(stream_factory)) { + TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, _ctz); +} + +RemoteDorisFileReader::~RemoteDorisFileReader() { + static_cast(close()); +} + +Status RemoteDorisFileReader::init(RuntimeState* state) { + (void)state; + RETURN_IF_ERROR(validate_remote_doris_range(_range)); + RETURN_IF_ERROR(_build_col_name_to_file_id()); + _eof = false; + return Status::OK(); +} + +Status RemoteDorisFileReader::get_schema(std::vector* file_schema) const { + DORIS_CHECK(file_schema != nullptr); + file_schema->clear(); + file_schema->reserve(_file_slot_descs.size()); + for (size_t idx = 0; idx < _file_slot_descs.size(); ++idx) { + const auto* slot = _file_slot_descs[idx]; + DORIS_CHECK(slot != nullptr); + file_schema->push_back({ + .identifier = Field::create_field(cast_set(idx)), + .local_id = cast_set(idx), + .name = slot->col_name(), + .type = slot->type(), + // Remote Doris exposes table slots as file columns. Complex columns still need + // structural children so TableColumnMapper can validate and project them. + .children = synthesize_remote_doris_children(slot->type()), + }); + } + return Status::OK(); +} + +Status RemoteDorisFileReader::open(std::shared_ptr request) { + RETURN_IF_ERROR(FileReader::open(std::move(request))); + RETURN_IF_ERROR(_open_stream()); + _eof = false; + return Status::OK(); +} + +Status RemoteDorisFileReader::get_block(Block* file_block, size_t* rows, bool* eof) { + DORIS_CHECK(file_block != nullptr); + DORIS_CHECK(rows != nullptr); + DORIS_CHECK(eof != nullptr); + if (_stream == nullptr) { + return Status::InternalError("Remote Doris v2 reader is not open"); + } + + *rows = 0; + *eof = false; + std::shared_ptr batch; + RETURN_IF_ERROR(_stream->next(&batch)); + if (batch == nullptr) { + *eof = true; + _eof = true; + return Status::OK(); + } + + RETURN_IF_ERROR(_materialize_record_batch(*batch, file_block, rows)); + RETURN_IF_ERROR( + apply_materialized_reader_filters(_request.get(), _io_ctx.get(), file_block, rows)); + _reader_statistics.read_rows += *rows; + return Status::OK(); +} + +Status RemoteDorisFileReader::close() { + if (_stream != nullptr) { + RETURN_IF_ERROR(_stream->close()); + _stream.reset(); + } + _request.reset(); + _eof = true; + return Status::OK(); +} + +Status RemoteDorisFileReader::_open_stream() { + DORIS_CHECK(_stream == nullptr); + if (_stream_factory) { + RETURN_IF_ERROR(_stream_factory(_range, &_stream)); + } else { + RETURN_IF_ERROR(create_flight_stream(_range, &_stream)); + } + DORIS_CHECK(_stream != nullptr); + return Status::OK(); +} + +Status RemoteDorisFileReader::_materialize_record_batch(const arrow::RecordBatch& batch, + Block* file_block, size_t* rows) const { + DORIS_CHECK(file_block != nullptr); + DORIS_CHECK(rows != nullptr); + if (_request == nullptr) { + return Status::InternalError("Remote Doris v2 reader is not open"); + } + + std::vector materialized_columns(file_block->columns(), false); + for (int arrow_idx = 0; arrow_idx < batch.num_columns(); ++arrow_idx) { + const std::string& column_name = batch.schema()->field(arrow_idx)->name(); + const auto file_id_it = _col_name_to_file_id.find(column_name); + if (file_id_it == _col_name_to_file_id.end()) { + return Status::InternalError("Remote Doris returned unknown column {}", column_name); + } + const auto block_position_it = _request->local_positions.find(file_id_it->second); + if (block_position_it == _request->local_positions.end()) { + continue; + } + RETURN_IF_ERROR(_materialize_arrow_column(batch, arrow_idx, file_id_it->second, + block_position_it->second, file_block)); + materialized_columns[block_position_it->second.value()] = true; + } + + for (const auto& [file_column_id, block_position] : _request->local_positions) { + if (block_position.value() >= materialized_columns.size()) { + return Status::InternalError( + "Remote Doris requested block position {} out of range, block columns {}", + block_position.value(), materialized_columns.size()); + } + if (!materialized_columns[block_position.value()]) { + return Status::InternalError("Remote Doris did not return requested file column id {}", + file_column_id.value()); + } + } + + *rows = cast_set(batch.num_rows()); + return Status::OK(); +} + +Status RemoteDorisFileReader::_materialize_arrow_column(const arrow::RecordBatch& batch, + int arrow_column_idx, + LocalColumnId file_column_id, + const LocalIndex& block_position, + Block* file_block) const { + DORIS_CHECK(file_block != nullptr); + if (block_position.value() >= file_block->columns()) { + return Status::InternalError( + "Remote Doris block position {} out of range, block columns {}", + block_position.value(), file_block->columns()); + } + const auto column_name = batch.schema()->field(arrow_column_idx)->name(); + auto columns_guard = file_block->mutate_columns_scoped(); + auto& columns = columns_guard.mutable_columns(); + try { + RETURN_IF_ERROR(columns_guard.get_datatype_by_position(block_position.value()) + ->get_serde() + ->read_column_from_arrow(*columns[block_position.value()], + batch.column(arrow_column_idx).get(), 0, + batch.num_rows(), _ctz)); + } catch (const Exception& e) { + return Status::InternalError( + "Failed to convert Remote Doris Arrow column '{}' (file_column_id={}) to Doris " + "block: {}", + column_name, file_column_id.value(), e.what()); + } + return Status::OK(); +} + +Status RemoteDorisFileReader::_build_col_name_to_file_id() { + _col_name_to_file_id.clear(); + _col_name_to_file_id.reserve(_file_slot_descs.size()); + for (size_t idx = 0; idx < _file_slot_descs.size(); ++idx) { + const auto* slot = _file_slot_descs[idx]; + DORIS_CHECK(slot != nullptr); + _col_name_to_file_id.emplace(slot->col_name(), LocalColumnId(cast_set(idx))); + } + return Status::OK(); +} + +RemoteDorisReader::RemoteDorisReader(RemoteDorisStreamFactory stream_factory) + : _stream_factory(std::move(stream_factory)) {} + +Status RemoteDorisReader::init(TableReadOptions&& options) { + if (options.file_slot_descs == nullptr) { + return Status::InvalidArgument("Remote Doris v2 reader requires file slot descriptors"); + } + return TableReader::init(std::move(options)); +} + +Status RemoteDorisReader::prepare_split(const SplitReadOptions& options) { + RETURN_IF_ERROR(validate_remote_doris_range(options.current_range)); + return TableReader::prepare_split(options); +} + +Status RemoteDorisReader::create_file_reader(std::unique_ptr* reader) { + DORIS_CHECK(reader != nullptr); + DORIS_CHECK(_file_slot_descs != nullptr); + *reader = std::make_unique( + _system_properties, _current_task->data_file, _io_ctx, _scanner_profile, + _current_file_range_desc, *_file_slot_descs, _stream_factory); + return Status::OK(); +} + +} // namespace doris::format::remote_doris diff --git a/be/src/format_v2/table/remote_doris_reader.h b/be/src/format_v2/table/remote_doris_reader.h new file mode 100644 index 00000000000000..b4dd2a505a95ad --- /dev/null +++ b/be/src/format_v2/table/remote_doris_reader.h @@ -0,0 +1,104 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "format_v2/file_reader.h" +#include "format_v2/table_reader.h" +#include "gen_cpp/PlanNodes_types.h" + +namespace doris { +class Block; +class RuntimeProfile; +class RuntimeState; +class SlotDescriptor; +} // namespace doris + +namespace doris::format::remote_doris { + +// Small abstraction around Arrow Flight to keep Remote Doris v2 reader unit-testable without +// starting a Flight server. Production code uses FlightRemoteDorisStream; tests can provide +// RecordBatch-backed streams that exercise the same FileReader block materialization path. +class RemoteDorisStream { +public: + virtual ~RemoteDorisStream() = default; + virtual Status next(std::shared_ptr* batch) = 0; + virtual Status close() = 0; +}; + +using RemoteDorisStreamFactory = + std::function*)>; + +class RemoteDorisFileReader final : public FileReader { +public: + RemoteDorisFileReader(std::shared_ptr& system_properties, + std::unique_ptr& file_description, + std::shared_ptr io_ctx, RuntimeProfile* profile, + const TFileRangeDesc& range, + const std::vector& file_slot_descs, + RemoteDorisStreamFactory stream_factory = {}); + ~RemoteDorisFileReader() override; + + Status init(RuntimeState* state) override; + Status get_schema(std::vector* file_schema) const override; + Status open(std::shared_ptr request) override; + Status get_block(Block* file_block, size_t* rows, bool* eof) override; + Status close() override; + +private: + Status _open_stream(); + Status _materialize_record_batch(const arrow::RecordBatch& batch, Block* file_block, + size_t* rows) const; + Status _materialize_arrow_column(const arrow::RecordBatch& batch, int arrow_column_idx, + LocalColumnId file_column_id, const LocalIndex& block_position, + Block* file_block) const; + Status _build_col_name_to_file_id(); + + const TFileRangeDesc _range; + const std::vector _file_slot_descs; + RemoteDorisStreamFactory _stream_factory; + cctz::time_zone _ctz; + std::unique_ptr _stream; + std::unordered_map _col_name_to_file_id; +}; + +class RemoteDorisReader final : public TableReader { +public: + explicit RemoteDorisReader(RemoteDorisStreamFactory stream_factory = {}); + + Status init(TableReadOptions&& options) override; + Status prepare_split(const SplitReadOptions& options) override; + +protected: + Status create_file_reader(std::unique_ptr* reader) override; + +private: + RemoteDorisStreamFactory _stream_factory; +}; + +} // namespace doris::format::remote_doris diff --git a/be/src/format_v2/table/schema_history_util.cpp b/be/src/format_v2/table/schema_history_util.cpp new file mode 100644 index 00000000000000..10109839e6987d --- /dev/null +++ b/be/src/format_v2/table/schema_history_util.cpp @@ -0,0 +1,150 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/table/schema_history_util.h" + +#include +#include +#include + +#include "core/field.h" +#include "util/string_util.h" + +namespace doris::format { +namespace { + +const schema::external::TField* get_field_ptr(const schema::external::TFieldPtr& field_ptr) { + if (!field_ptr.__isset.field_ptr || field_ptr.field_ptr == nullptr) { + return nullptr; + } + return field_ptr.field_ptr.get(); +} + +const schema::external::TField* find_child_field_by_name( + const std::vector& fields, const std::string& name) { + for (const auto& field_ptr : fields) { + const auto* field = get_field_ptr(field_ptr); + if (field == nullptr) { + continue; + } + if (field->__isset.name && to_lower(field->name) == to_lower(name)) { + return field; + } + if (field->__isset.name_mapping && + std::ranges::any_of(field->name_mapping, [&](const std::string& alias) { + return to_lower(alias) == to_lower(name); + })) { + return field; + } + } + return nullptr; +} + +void annotate_column_from_field(ColumnDefinition* column, const schema::external::TField& field); + +void annotate_struct_children(ColumnDefinition* column, + const schema::external::TStructField& struct_field) { + DORIS_CHECK(column != nullptr); + if (!struct_field.__isset.fields) { + return; + } + for (auto& child : column->children) { + const auto* child_field = find_child_field_by_name(struct_field.fields, child.name); + if (child_field != nullptr) { + annotate_column_from_field(&child, *child_field); + } + } +} + +void annotate_column_from_field(ColumnDefinition* column, const schema::external::TField& field) { + DORIS_CHECK(column != nullptr); + if (field.__isset.id) { + column->identifier = Field::create_field(field.id); + } + column->name_mapping = + field.__isset.name_mapping ? field.name_mapping : std::vector {}; + if (!field.__isset.nestedField) { + return; + } + if (field.nestedField.__isset.struct_field) { + annotate_struct_children(column, field.nestedField.struct_field); + } else if (field.nestedField.__isset.array_field) { + if (column->children.empty() || !field.nestedField.array_field.__isset.item_field) { + return; + } + const auto* item_field = get_field_ptr(field.nestedField.array_field.item_field); + if (item_field != nullptr) { + annotate_column_from_field(&column->children.front(), *item_field); + } + } else if (field.nestedField.__isset.map_field) { + if (!column->children.empty() && field.nestedField.map_field.__isset.key_field) { + const auto* key_field = get_field_ptr(field.nestedField.map_field.key_field); + if (key_field != nullptr) { + annotate_column_from_field(&column->children.front(), *key_field); + } + } + if (column->children.size() > 1 && field.nestedField.map_field.__isset.value_field) { + const auto* value_field = get_field_ptr(field.nestedField.map_field.value_field); + if (value_field != nullptr) { + annotate_column_from_field(&column->children[1], *value_field); + } + } + } +} + +} // namespace + +const schema::external::TSchema* find_history_schema(const TFileScanRangeParams* params, + int64_t schema_id) { + if (params == nullptr || !params->__isset.history_schema_info) { + return nullptr; + } + for (const auto& schema : params->history_schema_info) { + if (schema.__isset.schema_id && schema.schema_id == schema_id) { + return &schema; + } + } + return nullptr; +} + +bool can_map_by_history_schema(const TFileScanRangeParams* params, int64_t split_schema_id) { + if (split_schema_id < 0 || params == nullptr || !params->__isset.current_schema_id || + !params->__isset.history_schema_info) { + return false; + } + return find_history_schema(params, split_schema_id) != nullptr; +} + +Status annotate_file_schema_from_history(const TFileScanRangeParams* params, + int64_t split_schema_id, + std::vector* file_schema) { + DORIS_CHECK(file_schema != nullptr); + const auto* schema = find_history_schema(params, split_schema_id); + DORIS_CHECK(schema != nullptr); + if (!schema->__isset.root_field || !schema->root_field.__isset.fields) { + return Status::OK(); + } + for (auto& column : *file_schema) { + const auto* field = find_child_field_by_name(schema->root_field.fields, column.name); + if (field != nullptr) { + annotate_column_from_field(&column, *field); + } + } + return Status::OK(); +} + +} // namespace doris::format diff --git a/be/src/format_v2/table/schema_history_util.h b/be/src/format_v2/table/schema_history_util.h new file mode 100644 index 00000000000000..3c4a80b5d4c975 --- /dev/null +++ b/be/src/format_v2/table/schema_history_util.h @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "common/status.h" +#include "format_v2/column_data.h" +#include "gen_cpp/ExternalTableSchema_types.h" +#include "gen_cpp/PlanNodes_types.h" + +namespace doris::format { + +const schema::external::TSchema* find_history_schema(const TFileScanRangeParams* params, + int64_t schema_id); + +bool can_map_by_history_schema(const TFileScanRangeParams* params, int64_t split_schema_id); + +// Annotate a file-local schema with the field ids and name mappings from the historical table +// schema that describes the current split. TableReader has already annotated projected table +// columns from current_schema_id; this function performs the symmetric annotation for the file +// schema so TableColumnMapper can match evolved Hudi/Paimon files by field id. +Status annotate_file_schema_from_history(const TFileScanRangeParams* params, + int64_t split_schema_id, + std::vector* file_schema); + +} // namespace doris::format diff --git a/be/src/format_v2/table_reader.cpp b/be/src/format_v2/table_reader.cpp new file mode 100644 index 00000000000000..87d222c052f0a9 --- /dev/null +++ b/be/src/format_v2/table_reader.cpp @@ -0,0 +1,847 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/table_reader.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/cast_set.h" +#include "common/status.h" +#include "core/assert_cast.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_struct.h" +#include "exec/common/endian.h" +#include "exprs/vexpr_context.h" +#include "exprs/vslot_ref.h" +#include "format/table/deletion_vector_reader.h" +#include "format_v2/column_mapper.h" +#include "format_v2/delimited_text/csv_reader.h" +#include "format_v2/delimited_text/text_reader.h" +#include "format_v2/json/json_reader.h" +#include "format_v2/native/native_reader.h" +#include "format_v2/parquet/parquet_reader.h" +#include "roaring/roaring64map.hh" +#include "storage/segment/condition_cache.h" +#include "util/string_util.h" + +namespace doris::format { +namespace { + +template +std::string join_table_reader_debug_strings(const std::vector& values, Formatter formatter) { + std::ostringstream out; + out << "["; + for (size_t i = 0; i < values.size(); ++i) { + if (i > 0) { + out << ", "; + } + out << formatter(values[i]); + } + out << "]"; + return out.str(); +} + +std::string file_format_to_string(FileFormat format) { + switch (format) { + case FileFormat::PARQUET: + return "PARQUET"; + case FileFormat::ORC: + return "ORC"; + case FileFormat::CSV: + return "CSV"; + case FileFormat::JSON: + return "JSON"; + case FileFormat::TEXT: + return "TEXT"; + case FileFormat::JNI: + return "JNI"; + case FileFormat::NATIVE: + return "NATIVE"; + case FileFormat::ARROW: + return "ARROW"; + } + return "UNKNOWN"; +} + +std::string push_down_agg_to_string(TPushAggOp::type op) { + switch (op) { + case TPushAggOp::NONE: + return "NONE"; + case TPushAggOp::COUNT: + return "COUNT"; + case TPushAggOp::MINMAX: + return "MINMAX"; + case TPushAggOp::MIX: + return "MIX"; + case TPushAggOp::COUNT_ON_INDEX: + return "COUNT_ON_INDEX"; + } + return "UNKNOWN"; +} + +std::string current_file_debug_string(const std::unique_ptr& task) { + if (task == nullptr || task->data_file == nullptr) { + return "null"; + } + const auto& file = *task->data_file; + std::ostringstream out; + out << "FileDescription{path=" << file.path << ", file_size=" << file.file_size + << ", range_start_offset=" << file.range_start_offset << ", range_size=" << file.range_size + << ", mtime=" << file.mtime << ", fs_name=" << file.fs_name + << ", file_cache_admission=" << file.file_cache_admission << "}"; + return out.str(); +} + +std::string partition_values_debug_string(const std::map& partition_values) { + std::ostringstream out; + out << "{"; + size_t idx = 0; + for (const auto& [key, _] : partition_values) { + if (idx++ > 0) { + out << ", "; + } + out << key; + } + out << "}"; + return out.str(); +} + +const schema::external::TField* get_field_ptr(const schema::external::TFieldPtr& field_ptr) { + if (!field_ptr.__isset.field_ptr || field_ptr.field_ptr == nullptr) { + return nullptr; + } + return field_ptr.field_ptr.get(); +} + +bool external_field_matches_name(const schema::external::TField& field, const std::string& name) { + if (field.__isset.name && to_lower(field.name) == to_lower(name)) { + return true; + } + return field.__isset.name_mapping && + std::ranges::any_of(field.name_mapping, [&](const std::string& alias) { + return to_lower(alias) == to_lower(name); + }); +} + +DataTypePtr find_struct_child_type_by_external_field(const DataTypeStruct& struct_type, + const schema::external::TField& field) { + for (size_t field_idx = 0; field_idx < struct_type.get_elements().size(); ++field_idx) { + if (external_field_matches_name(field, struct_type.get_element_name(field_idx))) { + return struct_type.get_element(field_idx); + } + } + return nullptr; +} + +ColumnDefinition build_schema_column_from_external_field(const schema::external::TField& field, + DataTypePtr type) { + ColumnDefinition column { + .identifier = field.__isset.id ? Field::create_field(field.id) : Field {}, + .name = field.__isset.name ? field.name : "", + .name_mapping = + field.__isset.name_mapping ? field.name_mapping : std::vector {}, + .type = std::move(type), + .children = {}, + .default_expr = nullptr, + .is_partition_key = false, + }; + if (column.type == nullptr || !field.__isset.nestedField) { + return column; + } + + const auto nested_type = remove_nullable(column.type); + switch (nested_type->get_primitive_type()) { + case TYPE_STRUCT: { + if (!field.nestedField.__isset.struct_field || + !field.nestedField.struct_field.__isset.fields) { + return column; + } + const auto& struct_type = assert_cast(*nested_type); + for (const auto& child_ptr : field.nestedField.struct_field.fields) { + const auto* child_field = get_field_ptr(child_ptr); + if (child_field == nullptr || !child_field->__isset.name) { + continue; + } + auto child_type = find_struct_child_type_by_external_field(struct_type, *child_field); + if (child_type == nullptr) { + continue; + } + column.children.push_back( + build_schema_column_from_external_field(*child_field, child_type)); + } + break; + } + case TYPE_ARRAY: { + if (!field.nestedField.__isset.array_field || + !field.nestedField.array_field.__isset.item_field) { + return column; + } + const auto* item_field = get_field_ptr(field.nestedField.array_field.item_field); + if (item_field == nullptr) { + return column; + } + const auto& array_type = assert_cast(*nested_type); + auto child = + build_schema_column_from_external_field(*item_field, array_type.get_nested_type()); + child.name = "element"; + if (child.has_identifier_name()) { + child.identifier = Field::create_field(child.name); + } + column.children.push_back(std::move(child)); + break; + } + case TYPE_MAP: { + if (!field.nestedField.__isset.map_field || + !field.nestedField.map_field.__isset.key_field || + !field.nestedField.map_field.__isset.value_field) { + return column; + } + const auto& map_type = assert_cast(*nested_type); + const auto* key_field = get_field_ptr(field.nestedField.map_field.key_field); + if (key_field != nullptr) { + auto child = + build_schema_column_from_external_field(*key_field, map_type.get_key_type()); + child.name = "key"; + if (child.has_identifier_name()) { + child.identifier = Field::create_field(child.name); + } + column.children.push_back(std::move(child)); + } + const auto* value_field = get_field_ptr(field.nestedField.map_field.value_field); + if (value_field != nullptr) { + auto child = build_schema_column_from_external_field(*value_field, + map_type.get_value_type()); + child.name = "value"; + if (child.has_identifier_name()) { + child.identifier = Field::create_field(child.name); + } + column.children.push_back(std::move(child)); + } + break; + } + default: + break; + } + return column; +} + +const schema::external::TField* find_external_root_field(const TFileScanRangeParams* params, + const ColumnDefinition& column) { + if (params == nullptr || !params->__isset.history_schema_info || + params->history_schema_info.empty()) { + return nullptr; + } + const auto* schema = ¶ms->history_schema_info.front(); + if (params->__isset.current_schema_id) { + for (const auto& candidate_schema : params->history_schema_info) { + if (candidate_schema.__isset.schema_id && + candidate_schema.schema_id == params->current_schema_id) { + schema = &candidate_schema; + break; + } + } + } + if (!schema->__isset.root_field || !schema->root_field.__isset.fields) { + return nullptr; + } + for (const auto& field_ptr : schema->root_field.fields) { + const auto* field = get_field_ptr(field_ptr); + if (field == nullptr) { + continue; + } + if (external_field_matches_name(*field, column.name)) { + return field; + } + } + return nullptr; +} + +std::string expr_context_debug_string(const VExprContextSPtr& context) { + if (context == nullptr) { + return "null"; + } + const auto root = context->root(); + if (root == nullptr) { + return "VExprContext{root=null}"; + } + std::ostringstream out; + out << "VExprContext{root_name=" << root->expr_name() << ", root_debug=" << root->debug_string() + << "}"; + return out.str(); +} + +std::string table_filter_debug_string(const TableFilter& filter) { + std::ostringstream out; + out << "TableFilter{conjunct=" << expr_context_debug_string(filter.conjunct) + << ", global_indices=" + << join_table_reader_debug_strings( + filter.global_indices, + [](GlobalIndex global_index) { return std::to_string(global_index.value()); }) + << "}"; + return out.str(); +} + +std::string table_column_predicates_debug_string(const TableColumnPredicates& predicates) { + std::ostringstream out; + out << "{"; + size_t idx = 0; + for (const auto& [global_index, column_predicates] : predicates) { + if (idx++ > 0) { + out << ", "; + } + out << global_index.value() << ":{predicate_count=" << column_predicates.size() << "}"; + } + out << "}"; + return out.str(); +} + +bool contains_runtime_filter(const VExprContextSPtrs& conjuncts) { + return std::ranges::any_of(conjuncts, [](const auto& conjunct) { + return conjunct != nullptr && conjunct->root() != nullptr && + conjunct->root()->is_rf_wrapper(); + }); +} + +void collect_global_indices(const VExprSPtr& expr, std::set* global_indices) { + if (expr == nullptr) { + return; + } + if (expr->is_rf_wrapper()) { + // RuntimeFilterExpr wraps a real predicate expression but its own thrift node can still + // look like SLOT_REF. Collect indices from the wrapped predicate; do not cast the wrapper + // itself to VSlotRef. + collect_global_indices(expr->get_impl(), global_indices); + return; + } + if (expr->is_slot_ref()) { + const auto* slot_ref = assert_cast(expr.get()); + DORIS_CHECK(slot_ref->column_id() >= 0); + global_indices->insert(GlobalIndex(cast_set(slot_ref->column_id()))); + } + for (const auto& child : expr->children()) { + collect_global_indices(child, global_indices); + } +} + +Status build_table_filters_from_conjunct(const VExprContextSPtr& conjunct, RuntimeState* state, + std::vector* table_filters) { + if (conjunct == nullptr) { + return Status::OK(); + } + std::set global_indices; + collect_global_indices(conjunct->root(), &global_indices); + if (!global_indices.empty()) { + TableFilter table_filter; + VExprSPtr filter_root; + RETURN_IF_ERROR(clone_table_expr_tree(conjunct->root(), &filter_root)); + table_filter.conjunct = VExprContext::create_shared(std::move(filter_root)); + for (const auto global_index : global_indices) { + table_filter.global_indices.push_back(global_index); + } + table_filters->push_back(std::move(table_filter)); + } + return Status::OK(); +} + +Status parse_deletion_vector(const char* buf, size_t buffer_size, DeleteFileDesc::Format format, + DeleteRows* delete_rows) { + DORIS_CHECK(buf != nullptr); + DORIS_CHECK(delete_rows != nullptr); + DORIS_CHECK(format == DeleteFileDesc::Format::PAIMON || + format == DeleteFileDesc::Format::ICEBERG); + + const size_t checksum_size = format == DeleteFileDesc::Format::ICEBERG ? 4 : 0; + if (buffer_size < 8 + checksum_size) [[unlikely]] { + return Status::DataQualityError("Deletion vector file size too small: {}", buffer_size); + } + + auto total_length = BigEndian::Load32(buf); + if (total_length + 4 + checksum_size != buffer_size) [[unlikely]] { + return Status::DataQualityError("Deletion vector length mismatch, expected: {}, actual: {}", + total_length + 4 + checksum_size, buffer_size); + } + + const char* bitmap_buf = buf + 8; + const size_t bitmap_size = buffer_size - 8 - checksum_size; + if (format == DeleteFileDesc::Format::PAIMON) { + // Paimon BitmapDeletionVector stores: + // [4-byte big-endian length][4-byte magic 0x5E43F2D0][32-bit roaring bitmap] + // The length covers magic + bitmap, and does not include the leading length field. + constexpr static char PAIMON_BITMAP_MAGIC[] = {'\x5E', '\x43', '\xF2', '\xD0'}; + if (memcmp(buf + sizeof(total_length), PAIMON_BITMAP_MAGIC, 4) != 0) [[unlikely]] { + return Status::DataQualityError( + "Paimon deletion vector magic number mismatch, expected: {}, actual: {}", + BigEndian::Load32(PAIMON_BITMAP_MAGIC), + BigEndian::Load32(buf + sizeof(total_length))); + } + + roaring::Roaring bitmap; + try { + bitmap = roaring::Roaring::readSafe(bitmap_buf, bitmap_size); + } catch (const std::runtime_error& e) { + return Status::DataQualityError("Decode roaring bitmap failed, {}", e.what()); + } + + delete_rows->reserve(bitmap.cardinality()); + for (auto it = bitmap.begin(); it != bitmap.end(); it++) { + delete_rows->push_back(*it); + } + return Status::OK(); + } + + constexpr static char ICEBERG_DV_MAGIC[] = {'\xD1', '\xD3', '\x39', '\x64'}; + if (memcmp(buf + sizeof(total_length), ICEBERG_DV_MAGIC, 4) != 0) [[unlikely]] { + return Status::DataQualityError( + "Iceberg deletion vector magic number mismatch, expected: {}, actual: {}", + BigEndian::Load32(ICEBERG_DV_MAGIC), BigEndian::Load32(buf + sizeof(total_length))); + } + + roaring::Roaring64Map bitmap; + try { + bitmap = roaring::Roaring64Map::readSafe(bitmap_buf, bitmap_size); + } catch (const std::runtime_error& e) { + return Status::DataQualityError("Decode roaring bitmap failed, {}", e.what()); + } + + delete_rows->reserve(bitmap.cardinality()); + for (auto it = bitmap.begin(); it != bitmap.end(); it++) { + delete_rows->push_back(cast_set(*it)); + } + return Status::OK(); +} + +} // namespace + +std::shared_ptr create_system_properties( + const TFileScanRangeParams* scan_params) { + auto system_properties = std::make_shared(); + if (scan_params == nullptr || !scan_params->__isset.file_type) { + system_properties->system_type = TFileType::FILE_LOCAL; + return system_properties; + } + system_properties->system_type = scan_params->file_type; + system_properties->properties = scan_params->properties; + system_properties->hdfs_params = scan_params->hdfs_params; + if (scan_params->__isset.broker_addresses) { + system_properties->broker_addresses.assign(scan_params->broker_addresses.begin(), + scan_params->broker_addresses.end()); + } + return system_properties; +} + +std::string TableReader::debug_string() const { + std::ostringstream out; + out << "TableReader{format=" << file_format_to_string(_format) + << ", push_down_agg_type=" << push_down_agg_to_string(_push_down_agg_type) + << ", aggregate_pushdown_tried=" << _aggregate_pushdown_tried + << ", has_current_reader=" << (_data_reader.reader != nullptr) + << ", has_current_task=" << (_current_task != nullptr) + << ", current_file=" << current_file_debug_string(_current_task) + << ", has_delete_rows=" << (_delete_rows != nullptr) + << ", delete_row_count=" << (_delete_rows == nullptr ? 0 : _delete_rows->size()) + << ", has_system_properties=" << (_system_properties != nullptr) << ", system_type=" + << (_system_properties == nullptr ? static_cast(TFileType::FILE_LOCAL) + : static_cast(_system_properties->system_type)) + << ", has_scan_params=" << (_scan_params != nullptr) + << ", has_io_ctx=" << (_io_ctx != nullptr) + << ", has_runtime_state=" << (_runtime_state != nullptr) + << ", has_scanner_profile=" << (_scanner_profile != nullptr) + << ", mapper_options=" << _mapper_options.debug_string() << ", projected_columns=" + << join_table_reader_debug_strings( + _projected_columns, + [](const ColumnDefinition& column) { return column.debug_string(); }) + << ", partition_values=" << partition_values_debug_string(_partition_values) + << ", table_filters=" + << join_table_reader_debug_strings( + _table_filters, + [](const TableFilter& filter) { return table_filter_debug_string(filter); }) + << ", table_column_predicates=" + << table_column_predicates_debug_string(_table_column_predicates) + << ", conjunct_count=" << _conjuncts.size() << ", conjuncts=" + << join_table_reader_debug_strings(_conjuncts, + [](const VExprContextSPtr& conjunct) { + return expr_context_debug_string(conjunct); + }) + << ", file_schema=" + << join_table_reader_debug_strings( + _data_reader.file_schema, + [](const ColumnDefinition& field) { return field.debug_string(); }) + << ", file_block_layout=" + << join_table_reader_debug_strings( + _data_reader.file_block_layout, + [](const FileBlockColumn& column) { + std::ostringstream column_out; + column_out << "FileBlockColumn{file_column_id=" << column.file_column_id + << ", name=" << column.name << ", type=" + << (column.type == nullptr ? "null" : column.type->get_name()) + << "}"; + return column_out.str(); + }) + << ", block_template_columns=" << _data_reader.block_template.columns() + << ", column_mapper=" + << (_data_reader.column_mapper == nullptr ? "null" + : _data_reader.column_mapper->debug_string()) + << "}"; + return out.str(); +} + +Status TableReader::annotate_projected_column(const TFileScanSlotInfo& slot_info, + ProjectedColumnBuildContext* context, + ColumnDefinition* column) const { + (void)slot_info; + DORIS_CHECK(context != nullptr); + DORIS_CHECK(column != nullptr); + context->schema_column.reset(); + const auto* schema_field = find_external_root_field(context->scan_params, *column); + if (schema_field == nullptr) { + return Status::OK(); + } + context->schema_column = build_schema_column_from_external_field(*schema_field, column->type); + column->identifier = context->schema_column->identifier; + column->name_mapping = context->schema_column->name_mapping; + return Status::OK(); +} + +Status TableReader::init(TableReadOptions&& options) { + _scan_params = options.scan_params; + _format = options.format; + _io_ctx = options.io_ctx; + _runtime_state = options.runtime_state; + _scanner_profile = options.scanner_profile; + _file_slot_descs = options.file_slot_descs; + _push_down_agg_type = options.push_down_agg_type; + _condition_cache_digest = options.condition_cache_digest; + _projected_columns = std::move(options.projected_columns); + _system_properties = create_system_properties(_scan_params); + _mapper_options.mode = TableColumnMappingMode::BY_NAME; + _conjuncts = std::move(options.conjuncts); + _table_column_predicates = std::move(options.column_predicates); + + if (_scanner_profile != nullptr) { + static const char* table_profile = "TableReader"; + ADD_TIMER_WITH_LEVEL(_scanner_profile, table_profile, 1); + _profile.num_delete_files = ADD_CHILD_COUNTER_WITH_LEVEL(_scanner_profile, "NumDeleteFiles", + TUnit::UNIT, table_profile, 1); + _profile.num_delete_rows = ADD_CHILD_COUNTER_WITH_LEVEL(_scanner_profile, "NumDeleteRows", + TUnit::UNIT, table_profile, 1); + _profile.parse_delete_file_time = ADD_CHILD_TIMER_WITH_LEVEL( + _scanner_profile, "ParseDeleteFileTime", table_profile, 1); + _profile.exec_timer = + ADD_CHILD_TIMER_WITH_LEVEL(_scanner_profile, "GetBlockTime", table_profile, 1); + _profile.prepare_split_timer = + ADD_CHILD_TIMER_WITH_LEVEL(_scanner_profile, "PrepareSplitTime", table_profile, 1); + _profile.finalize_timer = + ADD_CHILD_TIMER_WITH_LEVEL(_scanner_profile, "FinalizeBlockTime", table_profile, 1); + _profile.create_reader_timer = + ADD_CHILD_TIMER_WITH_LEVEL(_scanner_profile, "CreateReaderTime", table_profile, 1); + _profile.pushdown_agg_timer = + ADD_CHILD_TIMER_WITH_LEVEL(_scanner_profile, "PushDownAggTime", table_profile, 1); + _profile.open_reader_timer = + ADD_CHILD_TIMER_WITH_LEVEL(_scanner_profile, "OpenReaderTime", table_profile, 1); + } + return Status::OK(); +} + +Status TableReader::_build_table_filters_from_conjuncts() { + _table_filters.clear(); + for (const auto& conjunct : _conjuncts) { + RETURN_IF_ERROR( + build_table_filters_from_conjunct(conjunct, _runtime_state, &_table_filters)); + } + return Status::OK(); +} + +Status TableReader::_open_local_filter_exprs(const FileScanRequest& file_request) { + RowDescriptor row_desc; + for (const auto& conjunct : file_request.conjuncts) { + RETURN_IF_ERROR(conjunct->prepare(_runtime_state, row_desc)); + RETURN_IF_ERROR(conjunct->open(_runtime_state)); + } + for (const auto& delete_conjunct : file_request.delete_conjuncts) { + RETURN_IF_ERROR(delete_conjunct->prepare(_runtime_state, row_desc)); + RETURN_IF_ERROR(delete_conjunct->open(_runtime_state)); + } + return Status::OK(); +} + +bool TableReader::_should_enable_condition_cache(const FileScanRequest& file_request) const { + if (_condition_cache_digest == 0 || _push_down_agg_type == TPushAggOp::type::COUNT || + _current_file_description == std::nullopt || _data_reader.reader == nullptr) { + return false; + } + // Condition cache is populated by file readers after evaluating file-local row-level + // conjuncts. ColumnPredicate-only scans can prune row groups/pages, but they do not produce a + // per-row survivor bitmap that can safely populate the cache. + if (file_request.conjuncts.empty()) { + return false; + } + // Delete files/deletion vectors are table-format state. They may change independently of the + // data file path/mtime/size used by the external cache key, so caching their result can become + // stale. Keep delete filtering enabled, but do not read or write condition cache. + if (_delete_rows != nullptr || !file_request.delete_conjuncts.empty()) { + return false; + } + // Runtime filters can arrive late and their payload is not guaranteed to be represented by the + // scan-local digest. Without a read-only mode, a MISS could insert a bitmap for P AND RF under + // the digest for only P. This mirrors the old FileScanner guard. + return !contains_runtime_filter(file_request.conjuncts); +} + +Status TableReader::_init_reader_condition_cache(const FileScanRequest& file_request) { + _condition_cache = nullptr; + _condition_cache_ctx = nullptr; + if (!_should_enable_condition_cache(file_request)) { + return Status::OK(); + } + + auto* cache = segment_v2::ConditionCache::instance(); + if (cache == nullptr) { + return Status::OK(); + } + const auto& file = *_current_file_description; + _condition_cache_key = segment_v2::ConditionCache::ExternalCacheKey( + file.path, file.mtime, file.file_size, _condition_cache_digest, file.range_start_offset, + file.range_size); + + segment_v2::ConditionCacheHandle handle; + const bool condition_cache_hit = cache->lookup(_condition_cache_key, &handle); + if (condition_cache_hit) { + _condition_cache = handle.get_filter_result(); + ++_condition_cache_hit_count; + } else { + const int64_t total_rows = _data_reader.reader->get_total_rows(); + if (total_rows <= 0) { + return Status::OK(); + } + // Add one guard granule for split ranges that start in the middle of a granule. A guard + // false bit beyond the real range never overlaps real rows, but avoids boundary overflow + // when a reader marks the last partial granule. + const size_t num_granules = (total_rows + ConditionCacheContext::GRANULE_SIZE - 1) / + ConditionCacheContext::GRANULE_SIZE; + _condition_cache = std::make_shared>(num_granules + 1, false); + } + + if (_condition_cache != nullptr) { + _condition_cache_ctx = std::make_shared(); + _condition_cache_ctx->is_hit = condition_cache_hit; + _condition_cache_ctx->filter_result = _condition_cache; + _data_reader.reader->set_condition_cache_context(_condition_cache_ctx); + } + return Status::OK(); +} + +void TableReader::_finalize_reader_condition_cache() { + if (_condition_cache_ctx == nullptr || _condition_cache_ctx->is_hit) { + _condition_cache = nullptr; + _condition_cache_ctx = nullptr; + return; + } + // LIMIT or scanner cancellation may close a reader before all selected row ranges are visited. + // Unvisited granules remain false in a MISS bitmap, so inserting a partial bitmap would make a + // later HIT skip valid rows. Only publish cache entries after the physical reader reaches EOF. + if (!_current_reader_reached_eof) { + _condition_cache = nullptr; + _condition_cache_ctx = nullptr; + return; + } + segment_v2::ConditionCache::instance()->insert(_condition_cache_key, + std::move(_condition_cache)); + _condition_cache = nullptr; + _condition_cache_ctx = nullptr; +} + +Status TableReader::create_next_reader(bool* eos) { + SCOPED_TIMER(_profile.create_reader_timer); + DCHECK(_data_reader.reader == nullptr); + if (_current_task == nullptr) { + *eos = true; + return Status::OK(); + } + + RETURN_IF_ERROR(create_file_reader(&_data_reader.reader)); + DORIS_CHECK(_data_reader.reader != nullptr); + if (_batch_size > 0) { + _data_reader.reader->set_batch_size(_batch_size); + } + RETURN_IF_ERROR(_data_reader.reader->init(_runtime_state)); + RETURN_IF_ERROR(open_reader()); + if (_data_reader.reader == nullptr) { + *eos = _current_task == nullptr; + return Status::OK(); + } + *eos = false; + return Status::OK(); +} + +Status TableReader::create_file_reader(std::unique_ptr* reader) { + DORIS_CHECK(reader != nullptr); + if (_format == FileFormat::PARQUET) { + const bool enable_mapping_timestamp_tz = + _scan_params != nullptr && _scan_params->__isset.enable_mapping_timestamp_tz && + _scan_params->enable_mapping_timestamp_tz; + *reader = std::make_unique( + _system_properties, _current_task->data_file, _io_ctx, _scanner_profile, + _global_rowid_context, enable_mapping_timestamp_tz); + return Status::OK(); + } + if (_format == FileFormat::CSV) { + if (_file_slot_descs == nullptr) { + return Status::InvalidArgument("CSV reader requires file slot descriptors"); + } + // CSV has no embedded schema. TableReader owns table-level mapping, while CsvReader needs + // only the physical file slots plus scan text parameters to build a file-local schema. + // Non-file columns such as partitions/defaults/virtual row ids are intentionally excluded + // from `_file_slot_descs` and are materialized during finalize_chunk(). + *reader = std::make_unique( + _system_properties, _current_task->data_file, _io_ctx, _scanner_profile, + _scan_params, *_file_slot_descs, _current_range_compress_type, + _current_range_load_id); + return Status::OK(); + } + if (_format == FileFormat::TEXT) { + if (_file_slot_descs == nullptr) { + return Status::InvalidArgument("Text reader requires file slot descriptors"); + } + // Text files have no embedded schema. As with CSV, TableReader handles table-level mapping + // and only passes physical file slots to the v2 TextReader. + *reader = std::make_unique( + _system_properties, _current_task->data_file, _io_ctx, _scanner_profile, + _scan_params, *_file_slot_descs, _current_range_compress_type, + _current_range_load_id); + return Status::OK(); + } + if (_format == FileFormat::JSON) { + if (_file_slot_descs == nullptr) { + return Status::InvalidArgument("JSON reader requires file slot descriptors"); + } + *reader = std::make_unique( + _system_properties, _current_task->data_file, _io_ctx, _scanner_profile, + _scan_params, _current_file_range_desc, *_file_slot_descs, + _current_range_compress_type, _current_range_load_id); + return Status::OK(); + } + if (_format == FileFormat::NATIVE) { + *reader = std::make_unique( + _system_properties, _current_task->data_file, _io_ctx, _scanner_profile); + return Status::OK(); + } + return Status::NotSupported("TableReader does not support file format {}", + file_format_to_string(_format)); +} + +std::unique_ptr create_file_description(const TFileRangeDesc& range) { + auto file_description = std::make_unique(); + file_description->path = range.path; + file_description->file_size = range.__isset.file_size ? range.file_size : -1; + file_description->mtime = range.__isset.modification_time ? range.modification_time : 0; + file_description->range_start_offset = range.__isset.start_offset ? range.start_offset : 0; + file_description->range_size = range.__isset.size ? range.size : -1; + if (range.__isset.fs_name) { + file_description->fs_name = range.fs_name; + } + if (range.__isset.file_cache_admission) { + file_description->file_cache_admission = range.file_cache_admission; + } + return file_description; +} + +Status TableReader::prepare_split(const SplitReadOptions& options) { + SCOPED_TIMER(_profile.prepare_split_timer); + // Update to current split format to handle ORC/PARQUET files in one table. + _format = options.current_split_format; + _partition_values = std::move(options.partition_values); + _current_task = std::make_unique(); + _current_task->data_file = create_file_description(options.current_range); + _current_file_description = *_current_task->data_file; + _current_file_range_desc = options.current_range; + _current_range_compress_type = options.current_range.__isset.compress_type + ? options.current_range.compress_type + : TFileCompressType::UNKNOWN; + _current_range_load_id = options.current_range.__isset.load_id + ? std::make_optional(options.current_range.load_id) + : std::nullopt; + _global_rowid_context = options.global_rowid_context; + _delete_rows = nullptr; + _aggregate_pushdown_tried = false; + _remaining_table_level_count = -1; + _current_reader_reached_eof = false; + if (_push_down_agg_type == TPushAggOp::type::COUNT && + options.current_range.__isset.table_format_params && + options.current_range.table_format_params.__isset.table_level_row_count) { + DORIS_CHECK(options.current_range.table_format_params.table_level_row_count >= -1); + _remaining_table_level_count = + options.current_range.table_format_params.table_level_row_count; + } + if (_is_table_level_count_active()) { + return Status::OK(); + } + return _parse_delete_predicates(options); +} + +Status TableReader::_parse_delete_predicates(const SplitReadOptions& options) { + DeleteFileDesc desc {.fs_name = options.current_range.fs_name}; + bool has_delete_file = false; + RETURN_IF_ERROR(_parse_deletion_vector_file(options.current_range.table_format_params, &desc, + &has_delete_file)); + if (has_delete_file) { + DORIS_CHECK(options.cache != nullptr); + Status create_status = Status::OK(); + + _delete_rows = options.cache->get(desc.key, [&]() -> DeleteRows* { + auto* delete_rows = new DeleteRows; + + DeletionVectorReader dv_reader(_runtime_state, _scanner_profile, *_scan_params, desc, + _io_ctx.get()); + create_status = dv_reader.open(); + if (!create_status.ok()) [[unlikely]] { + return nullptr; + } + + size_t bytes_read = desc.size; + std::vector buffer(bytes_read); + create_status = dv_reader.read_at(desc.start_offset, {buffer.data(), bytes_read}); + if (!create_status.ok()) [[unlikely]] { + return nullptr; + } + + const char* buf = buffer.data(); + SCOPED_TIMER(_profile.parse_delete_file_time); + create_status = parse_deletion_vector(buf, bytes_read, desc.format, delete_rows); + if (!create_status.ok()) [[unlikely]] { + return nullptr; + } + COUNTER_UPDATE(_profile.num_delete_rows, delete_rows->size()); + return delete_rows; + }); + RETURN_IF_ERROR(create_status); + } + + return Status::OK(); +} +} // namespace doris::format diff --git a/be/src/format_v2/table_reader.h b/be/src/format_v2/table_reader.h new file mode 100644 index 00000000000000..a557b22795ce0d --- /dev/null +++ b/be/src/format_v2/table_reader.h @@ -0,0 +1,1565 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/cast_set.h" +#include "common/exception.h" +#include "common/logging.h" +#include "common/status.h" +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_array.h" +#include "core/column/column_const.h" +#include "core/column/column_map.h" +#include "core/column/column_nullable.h" +#include "core/column/column_struct.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type/data_type_struct.h" +#include "core/field.h" +#include "exec/common/stringop_substring.h" +#include "exprs/vexpr.h" +#include "exprs/vexpr_context.h" +#include "exprs/vexpr_fwd.h" +#include "exprs/vslot_ref.h" +#include "format_v2/column_data.h" +#include "format_v2/column_mapper.h" +#include "format_v2/expr/cast.h" +#include "format_v2/expr/delete_predicate.h" +#include "format_v2/file_reader.h" +#include "format_v2/parquet/reader/column_reader.h" +#include "format_v2/schema_projection.h" +#include "gen_cpp/PlanNodes_types.h" +#include "runtime/descriptors.h" +#include "storage/segment/condition_cache.h" + +namespace doris { +class Block; +class ColumnPredicate; +struct DeleteFileDesc; +class RuntimeState; +} // namespace doris + +namespace doris::format { + +using DeleteRows = std::vector; + +// Row-level predicates on table/global schema. They are rewritten to file-local expressions when +// possible, and remain the source of row-level filtering after localization. +struct TableFilter { + VExprContextSPtr conjunct; + std::vector global_indices; +}; + +struct ScanTask { + virtual ~ScanTask() = default; + + std::unique_ptr data_file; +}; + +struct ProjectedColumnBuildContext { + const TFileScanRangeParams* scan_params = nullptr; + const TFileRangeDesc* range = nullptr; + RuntimeState* runtime_state = nullptr; + std::optional schema_column = std::nullopt; + size_t next_file_column_idx = 0; +}; + +struct ReadProfile { + RuntimeProfile::Counter* num_delete_files = nullptr; + RuntimeProfile::Counter* num_delete_rows = nullptr; + RuntimeProfile::Counter* parse_delete_file_time = nullptr; + RuntimeProfile::Counter* exec_timer = nullptr; + RuntimeProfile::Counter* prepare_split_timer = nullptr; + RuntimeProfile::Counter* finalize_timer = nullptr; + RuntimeProfile::Counter* create_reader_timer = nullptr; + RuntimeProfile::Counter* pushdown_agg_timer = nullptr; + RuntimeProfile::Counter* open_reader_timer = nullptr; +}; + +struct TableReadOptions { + // Columns need to be read from file and output by table reader. They are all in table/global + // schema semantics. + const std::vector projected_columns; + // Simple predicates for a single column, which is parsed on scan operator. + const TableColumnPredicates column_predicates; + // All complex conjuncts from scan operator + const VExprContextSPtrs conjuncts; + // File format of the underlying data files, needed for reader initialization and reader-level + // filter pushdown. + const FileFormat format; + TFileScanRangeParams* scan_params; + std::shared_ptr io_ctx; + RuntimeState* runtime_state; + RuntimeProfile* scanner_profile; + // File formats without complete self-describing metadata, such as CSV, Text, and JSON, need + // the FE-planned physical file slots to build their file-local schema and deserialize values. + const std::vector* file_slot_descs = nullptr; + // Push-down aggregate type. + const TPushAggOp::type push_down_agg_type = TPushAggOp::type::NONE; + // Digest of stable pushed-down predicates. A zero digest disables condition cache. + uint64_t condition_cache_digest = 0; +}; + +struct SplitReadOptions { + // Split-level information for reader initialization, which may include file path, partition values, delete file info, etc. The content is table format specific and opaque to table reader base class; it's the responsibility of the concrete table reader implementation to parse necessary information for reader initialization and filter pushdown. + std::map partition_values; + ShardedKVCache* cache; + TFileRangeDesc current_range; + FileFormat current_split_format = FileFormat::PARQUET; + std::optional global_rowid_context; +}; + +// Base class for table-level readers. +// This layer owns common table-level orchestration, such as split iteration, dynamic partition +// pruning, delete handling and conversion from file-local blocks to table-schema blocks. Concrete +// table-format readers only need to provide format-specific hooks for opening readers and parsing +// split metadata. +class TableReader { +public: + virtual ~TableReader() = default; + + // Initialize common runtime options for the table reader. Subclasses may call this from their + // own init(options); table-format schema and split metadata are provided later per split. + virtual Status init(TableReadOptions&& options); + + // FileScannerV2 adjusts this before each get_block() using an adaptive bytes-per-row estimate. + // Store it here as well as forwarding to the current reader so newly opened split readers start + // with the latest predicted batch size. + void set_batch_size(size_t batch_size) { + _batch_size = std::max(1, batch_size); + if (_data_reader.reader != nullptr) { + _data_reader.reader->set_batch_size(_batch_size); + } + } + + // Prepare for reading a new split/task. + // 1. Pass a new split/task to reader, which will be used in subsequent open_reader() to initialize the underlying file reader. + // 2. Parse delete predicates from split/task information, which will be used for later dynamic filtering and delete handling. + virtual Status prepare_split(const SplitReadOptions& options); + + // Public entry point for reading a table-schema block. The base class opens the current reader, + // advances across EOF, and closes exhausted readers. Subclasses provide protected hooks for + // table-format-specific behavior. + virtual Status get_block(Block* block, bool* eos) { + SCOPED_TIMER(_profile.exec_timer); + DORIS_CHECK(block->columns() == _projected_columns.size()); + block->clear_column_data(_projected_columns.size()); + + while (true) { + if (*eos) { + return Status::OK(); + } + if (!_data_reader.reader) { + if (_is_table_level_count_active()) { + RETURN_IF_ERROR(_read_table_level_count(block, eos)); + return Status::OK(); + } + RETURN_IF_ERROR(create_next_reader(eos)); + if (!_data_reader.reader) { + DCHECK(*eos); + return Status::OK(); + } + } + + // Materialize a reduced row set for upper aggregate operators when aggregate + // pushdown can be applied. This is not the final aggregate result: COUNT emits + // `count` default rows for the upper COUNT(*), and MIN/MAX emits two rows containing + // file-level min/max values for the upper MIN/MAX. + if (!_aggregate_pushdown_tried) { + SCOPED_TIMER(_profile.pushdown_agg_timer); + bool pushed_down = false; + RETURN_IF_ERROR(_try_materialize_aggregate_pushdown_rows(block, &pushed_down)); + if (pushed_down) { + return Status::OK(); + } + } + + bool current_eof = false; + _data_reader.block_template.clear_column_data( + cast_set(_data_reader.file_block_layout.size())); + size_t current_rows = 0; + RETURN_IF_ERROR(_data_reader.reader->get_block(&_data_reader.block_template, + ¤t_rows, ¤t_eof)); + if (current_rows == 0) { + if (current_eof) { + _current_reader_reached_eof = true; + RETURN_IF_ERROR(close_current_reader()); + } + continue; + } + DCHECK_EQ(_data_reader.block_template.columns(), _data_reader.file_block_layout.size()) + << _data_reader.block_template.dump_structure(); +#ifndef NDEBUG + RETURN_IF_ERROR(_check_file_block_columns("after file reader get_block", current_rows)); +#endif + DORIS_CHECK(block->columns() == _data_reader.column_mapper->mappings().size()); + RETURN_IF_ERROR(finalize_chunk(block, current_rows)); +#ifndef NDEBUG + RETURN_IF_ERROR( + _check_table_block_columns("after finalize_chunk", block, current_rows)); +#endif + if (current_eof) { + _current_reader_reached_eof = true; + RETURN_IF_ERROR(close_current_reader()); + } + return Status::OK(); + } + } + + // Close the table reader and the currently active file reader. Subclasses that hold additional + // table-format resources should override this and call TableReader::close() first. + virtual Status close() { + if (_data_reader.reader) { + RETURN_IF_ERROR(close_current_reader()); + } + _current_task.reset(); + _current_file_description.reset(); + _remaining_table_level_count = -1; + return Status::OK(); + } + + int64_t condition_cache_hit_count() const { return _condition_cache_hit_count; } + + virtual std::string debug_string() const; + + virtual Status annotate_projected_column(const TFileScanSlotInfo& slot_info, + ProjectedColumnBuildContext* context, + ColumnDefinition* column) const; + + virtual Status validate_projected_columns(const ProjectedColumnBuildContext& context) const { + (void)context; + return Status::OK(); + } + +protected: + // Parse deletion vector information from table format specific file description. + virtual Status _parse_deletion_vector_file(const TTableFormatFileDesc& t_desc, + DeleteFileDesc* desc, bool* has_delete_file) { + *has_delete_file = false; + return Status::OK(); + } + + // Advance to the next reader. This closes the current reader first and then opens the next + // concrete reader. Subclasses should not duplicate this loop. + Status create_next_reader(bool* eos); + virtual Status create_file_reader(std::unique_ptr* reader); + virtual TableColumnMappingMode mapping_mode() const { return TableColumnMappingMode::BY_NAME; } + virtual Status annotate_file_schema(std::vector* file_schema) { + DORIS_CHECK(file_schema != nullptr); + return Status::OK(); + } + + // Open the concrete reader for the current split/task and build the file-local scan request. + virtual Status open_reader() { + SCOPED_TIMER(_profile.open_reader_timer); + // 1. Get file schema and create column mapping. + std::vector file_schema; + RETURN_IF_ERROR(_data_reader.reader->get_schema(&file_schema)); + // For Paimon/Hudi, FE can provide field ids through `history_schema_info`. Annotate the + // file schema before column mapping when the table format maps columns by field id. + RETURN_IF_ERROR(annotate_file_schema(&file_schema)); + _data_reader.file_schema = file_schema; + _mapper_options.mode = mapping_mode(); + + _data_reader.column_mapper = _data_reader.reader->create_column_mapper(_mapper_options); + DORIS_CHECK(_data_reader.column_mapper != nullptr); + RETURN_IF_ERROR(_data_reader.column_mapper->create_mapping(_projected_columns, + _partition_values, file_schema)); + DORIS_CHECK(_data_reader.column_mapper->mappings().size() == _projected_columns.size()); + + // 2. Build table filters based on conjuncts and column predicates. + RETURN_IF_ERROR(_build_table_filters_from_conjuncts()); + + // 3. Create file scan request based on column mapping and table filters, then open file + // reader with the request. File scan request carries row-level expression filters and + // file-level pruning hints. Only expression filters decide returned rows; column predicates + // are pruning hints. + auto file_request = std::make_shared(); + RETURN_IF_ERROR(_data_reader.column_mapper->create_scan_request( + _table_filters, _table_column_predicates, _projected_columns, file_request.get(), + _runtime_state)); + bool constant_filter_pruned_split = false; + RETURN_IF_ERROR(_evaluate_constant_filters(&constant_filter_pruned_split)); + if (constant_filter_pruned_split) { + RETURN_IF_ERROR(close_current_reader()); + return Status::OK(); + } + RETURN_IF_ERROR(customize_file_scan_request(file_request.get())); + RETURN_IF_ERROR(_open_local_filter_exprs(*file_request)); + _data_reader.file_block_layout.clear(); + _data_reader.block_template.clear(); + _data_reader.file_block_layout.resize(file_request->local_positions.size()); + + // 4. Build file block layout from file schema and column mapping. The layout describes + // the block returned by file reader before table-column materialization. + for (const auto& [file_column_id, block_position] : file_request->local_positions) { + DORIS_CHECK(block_position.value() < _data_reader.file_block_layout.size()); + const auto* field = _find_column_definition(_data_reader.file_schema, file_column_id); + DORIS_CHECK(field != nullptr); + + ColumnDefinition projected_field; + { + auto it = std::find_if( + file_request->non_predicate_columns.begin(), + file_request->non_predicate_columns.end(), + [&](const LocalColumnIndex& p) { return p.column_id() == file_column_id; }); + if (it != file_request->non_predicate_columns.end()) { + RETURN_IF_ERROR(project_column_definition(*field, *it, &projected_field)); + } + } + { + auto it = std::find_if( + file_request->predicate_columns.begin(), + file_request->predicate_columns.end(), + [&](const LocalColumnIndex& p) { return p.column_id() == file_column_id; }); + if (it != file_request->predicate_columns.end()) { + RETURN_IF_ERROR(project_column_definition(*field, *it, &projected_field)); + } + } + _data_reader.file_block_layout[block_position.value()] = { + .file_column_id = file_column_id, + .name = projected_field.name, + .type = projected_field.type, + }; + DORIS_CHECK(_data_reader.file_block_layout[block_position.value()].type != nullptr); + } + + // 5. Prepare block template from file block layout. The block template stores the block + // returned by file reader before table-column materialization. + _data_reader.block_template.reserve(_data_reader.file_block_layout.size()); + for (const auto& column : _data_reader.file_block_layout) { + _data_reader.block_template.insert( + {column.type->create_column(), column.type, column.name}); + } + if (VLOG_DEBUG_IS_ON) { + VLOG_DEBUG << "TableReader debug: " << debug_string(); + } + RETURN_IF_ERROR(_open_mapping_exprs()); + RETURN_IF_ERROR(_data_reader.reader->open(file_request)); + RETURN_IF_ERROR(_init_reader_condition_cache(*file_request)); + return Status::OK(); + } + + Status _build_table_filters_from_conjuncts(); + Status _open_local_filter_exprs(const FileScanRequest& file_request); + Status _init_reader_condition_cache(const FileScanRequest& file_request); + void _finalize_reader_condition_cache(); + bool _should_enable_condition_cache(const FileScanRequest& file_request) const; + + Status _evaluate_constant_filters(bool* can_filter_all) { + DORIS_CHECK(can_filter_all != nullptr); + *can_filter_all = false; + for (const auto& table_filter : _table_filters) { + if (table_filter.conjunct == nullptr || + // RuntimeFilterExpr does not implement execute_column_impl(); it is evaluated by + // the row-level filter path through execute_filter(). Constant split pruning uses + // VExprContext::execute() on a one-row synthetic block, so runtime filters must not + // be pre-executed here even when their referenced slot maps to a constant value. + table_filter.conjunct->root()->is_rf_wrapper() || + !_table_filter_has_only_constant_entries(table_filter)) { + continue; + } + Block eval_block; + RETURN_IF_ERROR(_build_constant_filter_block(table_filter, &eval_block)); + RowDescriptor row_desc; + RETURN_IF_ERROR(table_filter.conjunct->prepare(_runtime_state, row_desc)); + RETURN_IF_ERROR(table_filter.conjunct->open(_runtime_state)); + int result_column_id = -1; + RETURN_IF_ERROR(table_filter.conjunct->execute(&eval_block, &result_column_id)); + DORIS_CHECK(result_column_id >= 0); + if (_filter_result_filters_all(eval_block.get_by_position(result_column_id).column)) { + *can_filter_all = true; + return Status::OK(); + } + } + return Status::OK(); + } + + bool _table_filter_has_only_constant_entries(const TableFilter& table_filter) const { + const auto& filter_entries = _data_reader.column_mapper->filter_entries(); + for (const auto global_index : table_filter.global_indices) { + const auto entry_it = filter_entries.find(global_index); + if (entry_it == filter_entries.end() || !entry_it->second.is_constant()) { + return false; + } + } + return !table_filter.global_indices.empty(); + } + + Status _build_constant_filter_block(const TableFilter& table_filter, Block* eval_block) { + DORIS_CHECK(eval_block != nullptr); + eval_block->clear(); + const auto& mappings = _data_reader.column_mapper->mappings(); + const auto& filter_entries = _data_reader.column_mapper->filter_entries(); + DORIS_CHECK(mappings.size() == _projected_columns.size()); + for (size_t column_idx = 0; column_idx < mappings.size(); ++column_idx) { + const auto global_index = GlobalIndex(column_idx); + const auto& mapping = mappings[column_idx]; + const auto entry_it = filter_entries.find(global_index); + const bool referenced_by_filter = + std::find(table_filter.global_indices.begin(), + table_filter.global_indices.end(), + global_index) != table_filter.global_indices.end(); + if (referenced_by_filter && entry_it != filter_entries.end() && + entry_it->second.is_constant()) { + ColumnPtr constant_column; + RETURN_IF_ERROR(_materialize_constant_filter_column( + entry_it->second.constant_index(), &constant_column)); + eval_block->insert({std::move(constant_column), mapping.table_type, + mapping.table_column_name}); + } else { + eval_block->insert({mapping.table_type->create_column_const_with_default_value(1), + mapping.table_type, mapping.table_column_name}); + } + } + return Status::OK(); + } + + Status _materialize_constant_filter_column(ConstantIndex constant_index, ColumnPtr* column) { + DORIS_CHECK(column != nullptr); + const auto& constant_entry = _data_reader.column_mapper->constant_map().get(constant_index); + DORIS_CHECK(constant_entry.expr != nullptr); + DORIS_CHECK(constant_entry.type != nullptr); + RowDescriptor row_desc; + RETURN_IF_ERROR(constant_entry.expr->prepare(_runtime_state, row_desc)); + RETURN_IF_ERROR(constant_entry.expr->open(_runtime_state)); + Block eval_block; + eval_block.insert({constant_entry.type->create_column_const_with_default_value(1), + constant_entry.type, "__table_reader_constant_filter"}); + int result_column_id = -1; + RETURN_IF_ERROR(constant_entry.expr->execute(&eval_block, &result_column_id)); + DORIS_CHECK(result_column_id >= 0); + *column = eval_block.get_by_position(result_column_id).column; + DORIS_CHECK((*column)->size() == 1); + return Status::OK(); + } + + static bool _filter_result_filters_all(const ColumnPtr& filter_column) { + DORIS_CHECK(filter_column.get() != nullptr); + DORIS_CHECK(filter_column->size() == 1); + return !filter_column->get_bool(0); + } + + virtual Status customize_file_scan_request(FileScanRequest* file_request) { + return _append_delete_predicate(file_request); + } + + bool _is_table_level_count_active() const { return _remaining_table_level_count >= 0; } + + Status _materialize_count_rows(size_t rows, Block* block) const { + DORIS_CHECK(block != nullptr); + DORIS_CHECK(block->columns() > 0 || rows == 0); + for (size_t column_idx = 0; column_idx < block->columns(); ++column_idx) { + auto column = block->get_by_position(column_idx).type->create_column(); + column->resize(rows); + block->replace_by_position(column_idx, std::move(column)); + } + return Status::OK(); + } + + Status _read_table_level_count(Block* block, bool* eos) { + DORIS_CHECK(block != nullptr); + DORIS_CHECK(eos != nullptr); + DORIS_CHECK(_push_down_agg_type == TPushAggOp::type::COUNT); + DORIS_CHECK(_remaining_table_level_count >= 0); + if (_remaining_table_level_count == 0) { + _remaining_table_level_count = -1; + _current_task.reset(); + *eos = true; + return Status::OK(); + } + + const int64_t batch_size = _runtime_state == nullptr + ? _remaining_table_level_count + : static_cast(_runtime_state->batch_size()); + const auto rows = std::min(_remaining_table_level_count, batch_size); + RETURN_IF_ERROR(_materialize_count_rows(cast_set(rows), block)); + _remaining_table_level_count -= rows; + *eos = false; + return Status::OK(); + } + + void _append_file_scan_column(FileScanRequest* request, LocalColumnId column_id, + std::vector* scan_columns) { + DORIS_CHECK(request != nullptr); + DORIS_CHECK(scan_columns != nullptr); + FileScanRequestBuilder builder(request); + Status status; + if (scan_columns == &request->predicate_columns) { + status = builder.add_predicate_column(column_id); + } else { + DORIS_CHECK(scan_columns == &request->non_predicate_columns); + status = builder.add_non_predicate_column(column_id); + } + DORIS_CHECK(status.ok()) << status.to_string(); + if (column_id == LocalColumnId(ROW_POSITION_COLUMN_ID) && + _find_column_definition(_data_reader.file_schema, column_id) == nullptr) { + _data_reader.file_schema.push_back(row_position_column_definition()); + } + } + + // Append DeletePredicate to file scan request if there are deletes. The predicate will be evaluated in file reader level and filter out deleted rows before returning data to table reader. + Status _append_delete_predicate(FileScanRequest* request) { + DORIS_CHECK(request != nullptr); + if (_delete_rows == nullptr || _delete_rows->empty()) { + return Status::OK(); + } + const auto row_position_column_id = LocalColumnId(ROW_POSITION_COLUMN_ID); + _append_file_scan_column(request, row_position_column_id, &request->predicate_columns); + + auto delete_predicate = std::make_shared(*_delete_rows); + const auto block_position = request->local_positions.at(row_position_column_id); + delete_predicate->add_child(VSlotRef::create_shared( + cast_set(block_position.value()), cast_set(block_position.value()), -1, + std::make_shared(), ROW_POSITION_COLUMN_NAME)); + + request->delete_conjuncts.push_back( + VExprContext::create_shared(std::move(delete_predicate))); + return Status::OK(); + } + + // Close the current concrete reader. This hook is called by both create_next_reader() and + // close(), so it should remain idempotent. + virtual Status close_current_reader() { + _finalize_reader_condition_cache(); + RETURN_IF_ERROR(_data_reader.reader->close()); + _data_reader.reader.reset(); + if (_data_reader.column_mapper != nullptr) { + _data_reader.column_mapper->clear(); + _data_reader.column_mapper.reset(); + } + _table_filters.clear(); + _data_reader.file_schema.clear(); + _data_reader.file_block_layout.clear(); + _data_reader.block_template.clear(); + _current_task.reset(); + _current_file_description.reset(); + _current_reader_reached_eof = false; + return Status::OK(); + } + + // Finalize file-local block to table/global schema block. + Status finalize_chunk(Block* block, const size_t rows) { + SCOPED_TIMER(_profile.finalize_timer); + size_t idx = 0; + for (const auto& mapping : _data_reader.column_mapper->mappings()) { + ColumnPtr column; + RETURN_IF_ERROR(_materialize_mapping_column(mapping, &_data_reader.block_template, rows, + &column)); + block->replace_by_position(idx, IColumn::mutate(std::move(column))); + idx++; + } + RETURN_IF_ERROR(materialize_virtual_columns(block)); + // Enforce CHAR/VARCHAR length declared by the table schema after all file-to-table + // materialization has finished. + RETURN_IF_ERROR(_truncate_char_or_varchar_columns(block)); + return Status::OK(); + } + + // Materialize virtual columns in the table block, such as Iceberg _row_id and + // _last_updated_sequence_number. This runs after normal column materialization so finalize + // expressions can reference those virtual columns. + virtual Status materialize_virtual_columns(Block* table_block) { return Status::OK(); } + +#ifndef NDEBUG + Status _check_file_block_columns(std::string_view stage, size_t rows) { + DORIS_CHECK(_data_reader.block_template.columns() == _data_reader.file_block_layout.size()); + for (size_t idx = 0; idx < _data_reader.block_template.columns(); ++idx) { + const auto& file_block_column = _data_reader.file_block_layout[idx]; + const auto& column_with_type = _data_reader.block_template.get_by_position(idx); + const auto* column = column_with_type.column.get(); + try { + if (column == nullptr) { + auto st = Status::InternalError( + "Invalid file block column {} at {}: file_column_id={}, name='{}', " + "type={}, column=null, expected_rows={}, reader={}", + idx, stage, file_block_column.file_column_id.value(), + file_block_column.name, + file_block_column.type == nullptr ? "null" + : file_block_column.type->get_name(), + rows, debug_string()); + LOG(WARNING) << st; + return st; + } + column->sanity_check(); + auto st = column_with_type.check_type_and_column_match(); + if (!st.ok()) { + auto contextual_status = Status::InternalError( + "Invalid file block column {} at {}: file_column_id={}, name='{}', " + "type={}, column={}, column_size={}, expected_rows={}, error={}, " + "reader={}", + idx, stage, file_block_column.file_column_id.value(), + file_block_column.name, + file_block_column.type == nullptr ? "null" + : file_block_column.type->get_name(), + column->get_name(), column->size(), rows, st.to_string(), + debug_string()); + LOG(WARNING) << contextual_status; + return contextual_status; + } + } catch (const Exception& e) { + auto st = Status::InternalError( + "Invalid file block column {} at {}: file_column_id={}, name='{}', " + "type={}, column={}, column_size={}, expected_rows={}, error={}, " + "reader={}", + idx, stage, file_block_column.file_column_id.value(), + file_block_column.name, + file_block_column.type == nullptr ? "null" + : file_block_column.type->get_name(), + column == nullptr ? "null" : column->get_name(), + column == nullptr ? 0 : column->size(), rows, e.to_string(), + debug_string()); + LOG(WARNING) << st; + return st; + } catch (const std::exception& e) { + auto st = Status::InternalError( + "Invalid file block column {} at {}: file_column_id={}, name='{}', " + "type={}, column={}, column_size={}, expected_rows={}, error={}, " + "reader={}", + idx, stage, file_block_column.file_column_id.value(), + file_block_column.name, + file_block_column.type == nullptr ? "null" + : file_block_column.type->get_name(), + column == nullptr ? "null" : column->get_name(), + column == nullptr ? 0 : column->size(), rows, e.what(), debug_string()); + LOG(WARNING) << st; + return st; + } + } + return Status::OK(); + } + + Status _check_table_block_columns(std::string_view stage, const Block* block, size_t rows) { + DORIS_CHECK(block != nullptr); + DORIS_CHECK(block->columns() == _data_reader.column_mapper->mappings().size()); + for (size_t idx = 0; idx < block->columns(); ++idx) { + const auto& mapping = _data_reader.column_mapper->mappings()[idx]; + const auto& column_with_type = block->get_by_position(idx); + const auto* column = column_with_type.column.get(); + try { + if (column == nullptr) { + auto st = Status::InternalError( + "Invalid table block column {} at {}: table_column='{}', " + "global_index={}, type={}, column=null, expected_rows={}, mapping={}", + idx, stage, mapping.table_column_name, mapping.global_index.value(), + mapping.table_type == nullptr ? "null" : mapping.table_type->get_name(), + rows, mapping.debug_string()); + LOG(WARNING) << st; + return st; + } + column->sanity_check(); + auto st = column_with_type.check_type_and_column_match(); + if (!st.ok()) { + auto contextual_status = Status::InternalError( + "Invalid table block column {} at {}: table_column='{}', " + "global_index={}, type={}, column={}, column_size={}, " + "expected_rows={}, error={}, mapping={}", + idx, stage, mapping.table_column_name, mapping.global_index.value(), + mapping.table_type == nullptr ? "null" : mapping.table_type->get_name(), + column->get_name(), column->size(), rows, st.to_string(), + mapping.debug_string()); + LOG(WARNING) << contextual_status; + return contextual_status; + } + } catch (const Exception& e) { + auto st = Status::InternalError( + "Invalid table block column {} at {}: table_column='{}', global_index={}, " + "type={}, column={}, column_size={}, expected_rows={}, error={}, " + "mapping={}", + idx, stage, mapping.table_column_name, mapping.global_index.value(), + mapping.table_type == nullptr ? "null" : mapping.table_type->get_name(), + column == nullptr ? "null" : column->get_name(), + column == nullptr ? 0 : column->size(), rows, e.to_string(), + mapping.debug_string()); + LOG(WARNING) << st; + return st; + } catch (const std::exception& e) { + auto st = Status::InternalError( + "Invalid table block column {} at {}: table_column='{}', global_index={}, " + "type={}, column={}, column_size={}, expected_rows={}, error={}, " + "mapping={}", + idx, stage, mapping.table_column_name, mapping.global_index.value(), + mapping.table_type == nullptr ? "null" : mapping.table_type->get_name(), + column == nullptr ? "null" : column->get_name(), + column == nullptr ? 0 : column->size(), rows, e.what(), + mapping.debug_string()); + LOG(WARNING) << st; + return st; + } + } + return Status::OK(); + } +#endif + + Status _truncate_char_or_varchar_columns(Block* block) { + DORIS_CHECK(block != nullptr); + if (_runtime_state == nullptr || + !_runtime_state->query_options().truncate_char_or_varchar_columns) { + return Status::OK(); + } + DORIS_CHECK(block->columns() == _data_reader.column_mapper->mappings().size()); + for (size_t idx = 0; idx < _data_reader.column_mapper->mappings().size(); ++idx) { + const auto& mapping = _data_reader.column_mapper->mappings()[idx]; + if (!_should_truncate_char_or_varchar_column(mapping)) { + continue; + } + const auto target_len = + assert_cast(remove_nullable(mapping.table_type).get()) + ->len(); + _truncate_char_or_varchar_column(block, idx, target_len); + } + return Status::OK(); + } + + // Return true when the table schema has a bounded CHAR/VARCHAR length that is stricter than + // the file-side type. Examples: + // - table VARCHAR(10), file VARCHAR(20): truncate to 10; + // - table VARCHAR(10), file STRING: truncate to 10 because STRING has no declared bound; + // - table STRING, any file type: no truncation because the target has no bound. + static bool _should_truncate_char_or_varchar_column(const ColumnMapping& mapping) { + if (mapping.table_type == nullptr) { + return false; + } + const auto table_type = remove_nullable(mapping.table_type); + const auto primitive_type = table_type->get_primitive_type(); + if (primitive_type != TYPE_VARCHAR && primitive_type != TYPE_CHAR) { + return false; + } + const auto target_len = assert_cast(table_type.get())->len(); + if (target_len <= 0) { + return false; + } + if (mapping.file_type == nullptr) { + return true; + } + const auto file_type = remove_nullable(mapping.file_type); + DORIS_CHECK(file_type != nullptr); + int file_len = -1; + if (file_type->get_primitive_type() == TYPE_VARCHAR || + file_type->get_primitive_type() == TYPE_CHAR || + file_type->get_primitive_type() == TYPE_STRING) { + file_len = assert_cast(file_type.get())->len(); + } + + return file_len < 0 || target_len < file_len; + } + + // Truncate a materialized CHAR/VARCHAR column in place by reusing the vectorized substring + // implementation: substring(column, 1, len). Nullable columns are unwrapped before substring + // execution and wrapped back with the original null map afterward, because substring operates + // on the nested string payload only. + static void _truncate_char_or_varchar_column(Block* block, size_t idx, int len) { + DORIS_CHECK(block != nullptr); + auto int_type = std::make_shared(); + const auto num_columns_without_result = cast_set(block->columns()); + auto& target = block->get_by_position(idx); + const bool is_nullable = target.type->is_nullable(); + ColumnPtr input_column = target.column; + ColumnPtr null_map_column; + if (is_nullable) { + const auto* nullable_column = assert_cast(target.column.get()); + input_column = nullable_column->get_nested_column_ptr(); + null_map_column = nullable_column->get_null_map_column_ptr(); + } + block->replace_by_position(idx, std::move(input_column)); + block->insert({int_type->create_column_const(block->rows(), to_field(1)), + int_type, "const 1"}); + block->insert({int_type->create_column_const(block->rows(), to_field(len)), + int_type, "const len"}); + block->insert({nullptr, std::make_shared(), "result"}); + + ColumnNumbers temp_arguments(3); + temp_arguments[0] = cast_set(idx); + temp_arguments[1] = num_columns_without_result; + temp_arguments[2] = num_columns_without_result + 1; + const uint32_t result_column_id = num_columns_without_result + 2; + SubstringUtil::substring_execute(*block, temp_arguments, result_column_id, block->rows()); + + ColumnPtr result_column = block->get_by_position(result_column_id).column; + if (is_nullable) { + result_column = ColumnNullable::create(std::move(result_column), null_map_column); + } + block->replace_by_position(idx, std::move(result_column)); + block->erase_tail(num_columns_without_result); + } + + Status _try_materialize_aggregate_pushdown_rows(Block* block, bool* pushed_down) { + DORIS_CHECK(block != nullptr); + DORIS_CHECK(pushed_down != nullptr); + *pushed_down = false; + block->clear_column_data(_projected_columns.size()); + _aggregate_pushdown_tried = true; + if (!_supports_aggregate_pushdown(_push_down_agg_type)) { + return Status::OK(); + } + + FileAggregateRequest file_request; + RETURN_IF_ERROR(_build_file_aggregate_request(_push_down_agg_type, &file_request)); + FileAggregateResult file_result; + const auto status = _data_reader.reader->get_aggregate_result(file_request, &file_result); + if (status.is()) { + return Status::OK(); + } + RETURN_IF_ERROR(status); + RETURN_IF_ERROR( + _materialize_aggregate_pushdown_rows(_push_down_agg_type, file_result, block)); + *pushed_down = true; + RETURN_IF_ERROR(close_current_reader()); + return Status::OK(); + } + + virtual bool _supports_aggregate_pushdown(TPushAggOp::type agg_type) const { + // Only COUNT and MIN/MAX can be push down. + if (agg_type != TPushAggOp::type::COUNT && agg_type != TPushAggOp::type::MINMAX) { + return false; + } + // Only support aggregate pushdown when there is no delete, filter and column predicate, so + // the reduced rows consumed by the upper aggregate remain semantically equivalent to a + // normal scan. + if (_delete_rows != nullptr && !_delete_rows->empty()) { + return false; + } + if (!_table_filters.empty() || !_table_column_predicates.empty()) { + return false; + } + if (agg_type == TPushAggOp::type::COUNT) { + return true; + } + // For MIN/MAX, only support direct file-to-table column mappings. The two emitted rows + // must be enough for the upper MIN/MAX aggregate without evaluating default expressions or + // virtual columns. + for (const auto& mapping : _data_reader.column_mapper->mappings()) { + if (!mapping.file_local_id.has_value() || + mapping.virtual_column_type != TableVirtualColumnType::INVALID || + mapping.default_expr != nullptr || mapping.file_type == nullptr || + mapping.table_type == nullptr) { + return false; + } + if (!_can_push_down_minmax_for_mapping(mapping)) { + return false; + } + } + return true; + } + + static ColumnPtr _detach_column(ColumnPtr column) { + DORIS_CHECK(column.get() != nullptr); + return IColumn::mutate(std::move(column)); + } + + static Status _align_column_nullability(ColumnPtr* column, const DataTypePtr& table_type) { + DORIS_CHECK(column != nullptr); + DORIS_CHECK(column->get() != nullptr); + DORIS_CHECK(table_type != nullptr); + // Must return non-const column + *column = (*column)->convert_to_full_column_if_const(); + if (table_type->is_nullable()) { + const auto& nested_type = + assert_cast(*table_type).get_nested_type(); + if (!(*column)->is_nullable()) { + RETURN_IF_ERROR(_align_column_nullability(column, nested_type)); + *column = make_nullable(*column); + return Status::OK(); + } + const auto& nullable_column = assert_cast(**column); + ColumnPtr nested_column = nullable_column.get_nested_column_ptr(); + RETURN_IF_ERROR(_align_column_nullability(&nested_column, nested_type)); + *column = ColumnNullable::create(nested_column, + nullable_column.get_null_map_column_ptr()); + return Status::OK(); + } + if ((*column)->is_nullable()) { + const auto& nullable_column = assert_cast(**column); + if (nullable_column.has_null()) { + return Status::InternalError( + "Default expression produced NULL for non-nullable table column"); + } + ColumnPtr nested_column = nullable_column.get_nested_column_ptr(); + RETURN_IF_ERROR(_align_column_nullability(&nested_column, table_type)); + *column = nested_column; + return Status::OK(); + } + if (const auto* array_type = typeid_cast(table_type.get())) { + const auto& array_column = assert_cast(**column); + ColumnPtr nested_column = array_column.get_data_ptr(); + RETURN_IF_ERROR( + _align_column_nullability(&nested_column, array_type->get_nested_type())); + *column = ColumnArray::create(nested_column, array_column.get_offsets_ptr()); + return Status::OK(); + } + if (const auto* map_type = typeid_cast(table_type.get())) { + const auto& map_column = assert_cast(**column); + ColumnPtr key_column = map_column.get_keys_ptr(); + ColumnPtr value_column = map_column.get_values_ptr(); + RETURN_IF_ERROR(_align_column_nullability(&key_column, map_type->get_key_type())); + RETURN_IF_ERROR(_align_column_nullability(&value_column, map_type->get_value_type())); + *column = ColumnMap::create(key_column, value_column, map_column.get_offsets_ptr()); + return Status::OK(); + } + if (const auto* struct_type = typeid_cast(table_type.get())) { + const auto& struct_column = assert_cast(**column); + Columns columns = struct_column.get_columns_copy(); + DORIS_CHECK(columns.size() == struct_type->get_elements().size()); + for (size_t i = 0; i < columns.size(); ++i) { + RETURN_IF_ERROR( + _align_column_nullability(&columns[i], struct_type->get_element(i))); + } + *column = ColumnStruct::create(columns); + return Status::OK(); + } + return Status::OK(); + } + + static Status _execute_default_expr_without_root_type_check( + const VExprContextSPtr& default_expr, const Block* block, + ColumnWithTypeAndName* result_data) { + DORIS_CHECK(default_expr != nullptr); + DORIS_CHECK(block != nullptr); + DORIS_CHECK(result_data != nullptr); + ColumnPtr result_column; + Status st; + RETURN_IF_CATCH_EXCEPTION({ + st = default_expr->root()->execute_column_impl(default_expr.get(), block, nullptr, + block->rows(), result_column); + }); + RETURN_IF_ERROR(st); + DORIS_CHECK(result_column.get() != nullptr); + if (result_column->size() != block->rows()) { + return Status::InternalError( + "Default expr {} return column size {} not equal to expected size {}", + default_expr->expr_name(), result_column->size(), block->rows()); + } + result_data->column = result_column; + result_data->type = default_expr->execute_type(block); + result_data->name = default_expr->expr_name(); + return Status::OK(); + } + + Status _cast_column_to_type(ColumnPtr* column, const DataTypePtr& file_type, + const DataTypePtr& table_type, + const std::string& column_name) const { + DORIS_CHECK(column != nullptr); + DORIS_CHECK(column->get() != nullptr); + DORIS_CHECK(file_type != nullptr); + DORIS_CHECK(table_type != nullptr); + if (file_type->equals(*table_type)) { + return Status::OK(); + } + + DataTypePtr input_type = file_type; + if ((*column)->is_nullable() && !input_type->is_nullable()) { + input_type = make_nullable(input_type); + } + Block cast_block; + cast_block.insert({*column, input_type, column_name}); + auto slot_ref = VSlotRef::create_shared(0, 0, -1, input_type, column_name); + auto cast_expr = Cast::create_shared(table_type); + cast_expr->add_child(std::move(slot_ref)); + auto cast_ctx = VExprContext::create_shared(std::move(cast_expr)); + RowDescriptor row_desc; + RETURN_IF_ERROR(cast_ctx->prepare(_runtime_state, row_desc)); + RETURN_IF_ERROR(cast_ctx->open(_runtime_state)); + ColumnPtr cast_column; + RETURN_IF_ERROR(cast_ctx->execute(&cast_block, cast_column)); + *column = std::move(cast_column); + return Status::OK(); + } + + Status _materialize_present_child_mapping_column(const ColumnMapping& mapping, + const ColumnPtr& file_column, + const size_t rows, ColumnPtr* column) { + DORIS_CHECK(column != nullptr); + DORIS_CHECK(mapping.file_type != nullptr); + DORIS_CHECK(mapping.table_type != nullptr); + *column = file_column; + if (!mapping.is_trivial) { + if (!mapping.child_mappings.empty()) { + RETURN_IF_ERROR( + _materialize_complex_mapping_column(mapping, *column, rows, column)); + } else { + RETURN_IF_ERROR(_cast_column_to_type(column, mapping.file_type, mapping.table_type, + mapping.file_column_name)); + } + } + RETURN_IF_ERROR(_align_column_nullability(column, mapping.table_type)); + return Status::OK(); + } + + Status _materialize_mapping_column(const ColumnMapping& mapping, Block* current_block, + const size_t rows, ColumnPtr* column) { + if (!mapping.is_trivial && mapping.file_local_id.has_value() && + !mapping.child_mappings.empty()) { + DCHECK(mapping.projection != nullptr); + int res_id; + auto st = mapping.projection->execute(current_block, &res_id); + if (!st.ok()) { + return Status::InternalError( + "Failed to execute complex mapping projection for table column '{}' " + "(global_index={}, file_local_id={}, rows={}): {}, mapping={}", + mapping.table_column_name, mapping.global_index.value(), + *mapping.file_local_id, rows, st.to_string(), mapping.debug_string()); + } + ColumnPtr result_column = current_block->get_by_position(res_id).column; + RETURN_IF_ERROR( + _materialize_complex_mapping_column(mapping, result_column, rows, column)); + return Status::OK(); + } + if (mapping.projection != nullptr) { + int res_id; + auto st = mapping.projection->execute(current_block, &res_id); + if (!st.ok()) { + std::string file_local_id = "null"; + if (mapping.file_local_id.has_value()) { + file_local_id = std::to_string(*mapping.file_local_id); + } + return Status::InternalError( + "Failed to execute mapping projection for table column '{}' " + "(global_index={}, file_local_id={}, rows={}): {}, mapping={}", + mapping.table_column_name, mapping.global_index.value(), file_local_id, + rows, st.to_string(), mapping.debug_string()); + } + ColumnPtr result_column = current_block->get_by_position(res_id).column; + *column = _detach_column(std::move(result_column)); + return Status::OK(); + } + if (mapping.default_expr != nullptr) { + if (current_block->rows() == rows) { + ColumnWithTypeAndName result; + RETURN_IF_ERROR(_execute_default_expr_without_root_type_check( + mapping.default_expr, current_block, &result)); + ColumnPtr result_column = result.column; + RETURN_IF_ERROR(_align_column_nullability(&result_column, mapping.table_type)); + *column = _detach_column(std::move(result_column)); + } else { + DORIS_CHECK(mapping.constant_index.has_value()); + Block eval_block; + eval_block.insert({mapping.table_type->create_column_const_with_default_value(rows), + mapping.table_type, "__table_reader_const_rows"}); + ColumnWithTypeAndName result; + RETURN_IF_ERROR(_execute_default_expr_without_root_type_check( + mapping.default_expr, &eval_block, &result)); + ColumnPtr result_column = result.column; + RETURN_IF_ERROR(_align_column_nullability(&result_column, mapping.table_type)); + *column = _detach_column(std::move(result_column)); + } + return Status::OK(); + } + ColumnPtr result_column = mapping.table_type->create_column_const_with_default_value(rows); + *column = _detach_column(std::move(result_column)); + return Status::OK(); + } + + Status _materialize_complex_mapping_column(const ColumnMapping& mapping, + const ColumnPtr& file_column, const size_t rows, + ColumnPtr* column) { + DORIS_CHECK(mapping.table_type != nullptr); + DORIS_CHECK(file_column.get() != nullptr); + const auto table_type = remove_nullable(mapping.table_type); + switch (table_type->get_primitive_type()) { + case TYPE_STRUCT: + RETURN_IF_ERROR(_materialize_struct_mapping_column(mapping, file_column, rows, column)); + break; + case TYPE_ARRAY: + RETURN_IF_ERROR(_materialize_array_mapping_column(mapping, file_column, rows, column)); + break; + case TYPE_MAP: + RETURN_IF_ERROR(_materialize_map_mapping_column(mapping, file_column, rows, column)); + break; + default: + *column = _detach_column(file_column); + break; + } + return Status::OK(); + } + + static std::vector _present_child_mappings_in_file_order( + const std::vector& child_mappings) { + std::vector result; + result.reserve(child_mappings.size()); + for (const auto& child_mapping : child_mappings) { + if (child_mapping.file_local_id.has_value()) { + result.push_back(&child_mapping); + } + } + std::ranges::sort(result, [](const ColumnMapping* lhs, const ColumnMapping* rhs) { + DORIS_CHECK(lhs->file_local_id.has_value()); + DORIS_CHECK(rhs->file_local_id.has_value()); + return *lhs->file_local_id < *rhs->file_local_id; + }); + return result; + } + + static size_t _file_child_ordinal_for_mapping( + const ColumnMapping& mapping, const ColumnMapping& child_mapping, + const std::vector& file_ordered_children) { + DORIS_CHECK(child_mapping.file_local_id.has_value()); + if (!mapping.projected_file_children.empty()) { + const auto child_it = std::ranges::find_if( + mapping.projected_file_children, [&](const ColumnDefinition& file_child) { + return file_child.file_local_id() == *child_mapping.file_local_id; + }); + DORIS_CHECK(child_it != mapping.projected_file_children.end()); + return static_cast( + std::distance(mapping.projected_file_children.begin(), child_it)); + } + const auto child_it = std::ranges::find(file_ordered_children, &child_mapping); + DORIS_CHECK(child_it != file_ordered_children.end()); + return static_cast(std::distance(file_ordered_children.begin(), child_it)); + } + + static std::vector _child_mappings_in_table_type_order( + const ColumnMapping& mapping, const DataTypeStruct& table_type) { + std::vector result; + result.reserve(mapping.child_mappings.size()); + for (size_t child_idx = 0; child_idx < table_type.get_elements().size(); ++child_idx) { + const auto& child_name = table_type.get_element_name(child_idx); + const auto child_it = std::ranges::find_if( + mapping.child_mappings, [&](const ColumnMapping& child_mapping) { + return child_mapping.table_column_name == child_name; + }); + DORIS_CHECK(child_it != mapping.child_mappings.end()) + << mapping.debug_string() << ", table_child_name=" << child_name; + result.push_back(&*child_it); + } + return result; + } + + static const IColumn* _nested_column_if_nullable(const ColumnPtr& column, + const NullMap** null_map) { + DORIS_CHECK(column.get() != nullptr); + if (const auto* nullable_column = check_and_get_column(*column)) { + if (null_map != nullptr) { + *null_map = &nullable_column->get_null_map_data(); + } + return &nullable_column->get_nested_column(); + } + return column.get(); + } + + Status _materialize_struct_mapping_column(const ColumnMapping& mapping, + const ColumnPtr& file_column, const size_t rows, + ColumnPtr* column) { + DORIS_CHECK(mapping.table_type != nullptr); + const auto* table_type = + assert_cast(remove_nullable(mapping.table_type).get()); + const auto full_file_column = file_column->convert_to_full_column_if_const(); + const NullMap* parent_null_map = nullptr; + const auto* nested_file_column = + _nested_column_if_nullable(full_file_column, &parent_null_map); + const auto* file_struct = assert_cast(nested_file_column); + DORIS_CHECK(table_type->get_elements().size() == mapping.child_mappings.size()); + + Columns child_columns; + child_columns.reserve(mapping.child_mappings.size()); + const auto file_ordered_children = + _present_child_mappings_in_file_order(mapping.child_mappings); + const auto table_ordered_children = + _child_mappings_in_table_type_order(mapping, *table_type); + for (const auto* child_mapping : table_ordered_children) { + DORIS_CHECK(child_mapping != nullptr); + if (!child_mapping->file_local_id.has_value()) { + child_columns.push_back( + child_mapping->table_type->create_column_const_with_default_value(rows) + ->convert_to_full_column_if_const()); + continue; + } + const auto file_child_idx = + _file_child_ordinal_for_mapping(mapping, *child_mapping, file_ordered_children); + DORIS_CHECK(file_child_idx < file_struct->get_columns().size()); + ColumnPtr child_column = file_struct->get_column_ptr(file_child_idx); + RETURN_IF_ERROR(_materialize_present_child_mapping_column(*child_mapping, child_column, + rows, &child_column)); + child_columns.push_back(std::move(child_column)); + } + MutableColumns mutable_child_columns; + mutable_child_columns.reserve(child_columns.size()); + for (auto& child_column : child_columns) { + mutable_child_columns.push_back(IColumn::mutate(std::move(child_column))); + } + auto result = ColumnStruct::create(std::move(mutable_child_columns)); + if (mapping.table_type->is_nullable()) { + auto null_map = ColumnUInt8::create(); + auto& null_map_data = null_map->get_data(); + null_map_data.resize(rows); + if (parent_null_map != nullptr) { + DORIS_CHECK(parent_null_map->size() == rows); + null_map_data.assign(parent_null_map->begin(), parent_null_map->end()); + } else { + std::fill(null_map_data.begin(), null_map_data.end(), 0); + } + *column = ColumnNullable::create(std::move(result), std::move(null_map)); + } else { + *column = std::move(result); + } + return Status::OK(); + } + + Status _materialize_array_mapping_column(const ColumnMapping& mapping, + const ColumnPtr& file_column, const size_t rows, + ColumnPtr* column) { + DORIS_CHECK(mapping.child_mappings.size() == 1); + const auto full_file_column = file_column->convert_to_full_column_if_const(); + const NullMap* parent_null_map = nullptr; + const auto* nested_file_column = + _nested_column_if_nullable(full_file_column, &parent_null_map); + const auto* file_array = assert_cast(nested_file_column); + ColumnPtr nested_column = file_array->get_data_ptr(); + const auto& element_mapping = mapping.child_mappings[0]; + RETURN_IF_ERROR(_materialize_present_child_mapping_column( + element_mapping, nested_column, nested_column->size(), &nested_column)); + auto offsets_column = file_array->get_offsets_ptr()->convert_to_full_column_if_const(); + auto result = ColumnArray::create(IColumn::mutate(std::move(nested_column)), + IColumn::mutate(std::move(offsets_column))); + if (mapping.table_type->is_nullable()) { + auto null_map = ColumnUInt8::create(); + auto& null_map_data = null_map->get_data(); + null_map_data.resize(rows); + if (parent_null_map != nullptr) { + DORIS_CHECK(parent_null_map->size() == rows); + null_map_data.assign(parent_null_map->begin(), parent_null_map->end()); + } else { + std::fill(null_map_data.begin(), null_map_data.end(), 0); + } + *column = ColumnNullable::create(std::move(result), std::move(null_map)); + } else { + *column = std::move(result); + } + return Status::OK(); + } + + Status _materialize_map_mapping_column(const ColumnMapping& mapping, + const ColumnPtr& file_column, const size_t rows, + ColumnPtr* column) { + const auto full_file_column = file_column->convert_to_full_column_if_const(); + const NullMap* parent_null_map = nullptr; + const auto* nested_file_column = + _nested_column_if_nullable(full_file_column, &parent_null_map); + const auto* file_map = assert_cast(nested_file_column); + ColumnPtr key_column = file_map->get_keys_ptr(); + ColumnPtr value_column = file_map->get_values_ptr(); + + const ColumnMapping* key_mapping = nullptr; + const ColumnMapping* value_mapping = nullptr; + for (const auto& child_mapping : mapping.child_mappings) { + if (!child_mapping.file_local_id.has_value()) { + continue; + } + if (*child_mapping.file_local_id == 0) { + key_mapping = &child_mapping; + } else if (*child_mapping.file_local_id == 1) { + value_mapping = &child_mapping; + } + } + + if (key_mapping != nullptr) { + RETURN_IF_ERROR(_materialize_present_child_mapping_column( + *key_mapping, key_column, key_column->size(), &key_column)); + } + if (value_mapping != nullptr) { + RETURN_IF_ERROR(_materialize_present_child_mapping_column( + *value_mapping, value_column, value_column->size(), &value_column)); + } + auto offsets_column = file_map->get_offsets_ptr()->convert_to_full_column_if_const(); + auto result = ColumnMap::create(IColumn::mutate(std::move(key_column)), + IColumn::mutate(std::move(value_column)), + IColumn::mutate(std::move(offsets_column))); + if (mapping.table_type->is_nullable()) { + auto null_map = ColumnUInt8::create(); + auto& null_map_data = null_map->get_data(); + null_map_data.resize(rows); + if (parent_null_map != nullptr) { + DORIS_CHECK(parent_null_map->size() == rows); + null_map_data.assign(parent_null_map->begin(), parent_null_map->end()); + } else { + std::fill(null_map_data.begin(), null_map_data.end(), 0); + } + *column = ColumnNullable::create(std::move(result), std::move(null_map)); + } else { + *column = std::move(result); + } + return Status::OK(); + } + + Status _open_mapping_exprs() { + RowDescriptor row_desc; + for (const auto& mapping : _data_reader.column_mapper->mappings()) { + if (mapping.projection != nullptr) { + RETURN_IF_ERROR(mapping.projection->prepare(_runtime_state, row_desc)); + RETURN_IF_ERROR(mapping.projection->open(_runtime_state)); + } + if (mapping.default_expr != nullptr) { + RETURN_IF_ERROR(mapping.default_expr->prepare(_runtime_state, row_desc)); + RETURN_IF_ERROR(mapping.default_expr->open(_runtime_state)); + } + } + return Status::OK(); + } + + Status _build_file_aggregate_request(TPushAggOp::type agg_type, + FileAggregateRequest* request) const { + DORIS_CHECK(request != nullptr); + DORIS_CHECK(_supports_aggregate_pushdown(agg_type)); + request->agg_type = agg_type; + request->columns.clear(); + if (agg_type == TPushAggOp::type::COUNT) { + // COUNT pushdown historically meant COUNT(*) and therefore carried no columns. For + // complex COUNT(col), materializing the full MAP/LIST/STRUCT value only to test the + // top-level NULL bit can be extremely expensive. When the scan projects exactly one + // directly-mapped complex column, pass that file column to the reader so formats such + // as Parquet can count the column shape from metadata/levels without decoding payload + // values like MAP value strings. Other COUNT cases stay on the existing row-count path + // to avoid changing count(*) semantics. + if (_data_reader.column_mapper->mappings().size() == 1) { + const auto& mapping = _data_reader.column_mapper->mappings()[0]; + if (mapping.file_local_id.has_value() && mapping.file_type != nullptr && + is_complex_type(remove_nullable(mapping.file_type)->get_primitive_type()) && + mapping.virtual_column_type == TableVirtualColumnType::INVALID && + mapping.default_expr == nullptr) { + FileAggregateRequest::Column column; + column.projection = + LocalColumnIndex::top_level(LocalColumnId(*mapping.file_local_id)); + request->columns.push_back(std::move(column)); + } + } + return Status::OK(); + } + request->columns.reserve(_data_reader.column_mapper->mappings().size()); + for (const auto& mapping : _data_reader.column_mapper->mappings()) { + DORIS_CHECK(mapping.file_local_id.has_value()); + FileAggregateRequest::Column column; + column.projection = LocalColumnIndex::top_level(LocalColumnId(*mapping.file_local_id)); + if (!mapping.child_mappings.empty()) { + RETURN_IF_ERROR(build_aggregate_projection(mapping, &column.projection)); + } + request->columns.push_back(std::move(column)); + } + return Status::OK(); + } + + Status _materialize_aggregate_pushdown_rows(TPushAggOp::type agg_type, + const FileAggregateResult& file_result, + Block* block) { + if (agg_type == TPushAggOp::type::COUNT) { + // COUNT pushdown is not a final count value. It emits `count` default rows so the + // upper COUNT(*) aggregate can count them and produce the final result, including + // zero rows when count is 0. + DORIS_CHECK(file_result.count >= 0); + return _materialize_count_rows(cast_set(file_result.count), block); + } + // MIN/MAX pushdown emits two rows, min first and max second, for each projected column. + // The upper MIN/MAX aggregate consumes those two rows to produce the final aggregate value. + DORIS_CHECK(file_result.columns.size() == _data_reader.column_mapper->mappings().size()); + DORIS_CHECK(block->columns() == _data_reader.column_mapper->mappings().size()); + Block file_block; + file_block.reserve(_data_reader.file_block_layout.size()); + for (const auto& column : _data_reader.file_block_layout) { + file_block.insert({column.type->create_column(), column.type, column.name}); + } + for (size_t column_idx = 0; column_idx < file_result.columns.size(); ++column_idx) { + const auto& result_column = file_result.columns[column_idx]; + if (!result_column.has_min || !result_column.has_max) { + return Status::NotSupported("Missing min/max aggregate result for column {}", + _projected_columns[column_idx].name); + } + bool found_file_column = false; + for (size_t block_position = 0; block_position < _data_reader.file_block_layout.size(); + ++block_position) { + if (_data_reader.file_block_layout[block_position].file_column_id == + file_result.columns[column_idx].projection.column_id()) { + found_file_column = true; + auto column = file_block.get_by_position(block_position) + .type->create_column() + ->assert_mutable(); + RETURN_IF_ERROR(_insert_aggregate_projection_value( + file_result.columns[column_idx].projection, result_column.min_value, + column.get())); + RETURN_IF_ERROR(_insert_aggregate_projection_value( + file_result.columns[column_idx].projection, result_column.max_value, + column.get())); + file_block.replace_by_position(block_position, std::move(column)); + break; + } + } + DORIS_CHECK(found_file_column); + } + for (size_t column_idx = 0; column_idx < _data_reader.column_mapper->mappings().size(); + ++column_idx) { + ColumnPtr table_column; + RETURN_IF_ERROR( + _materialize_mapping_column(_data_reader.column_mapper->mappings()[column_idx], + &file_block, 2, &table_column)); + block->replace_by_position(column_idx, std::move(table_column)); + } + return Status::OK(); + } + + struct FileBlockColumn { + LocalColumnId file_column_id = LocalColumnId::invalid(); + std::string name; + DataTypePtr type; + }; + + struct DataReader { + std::unique_ptr reader; + std::unique_ptr column_mapper; + // Schema of the data file, also including virtual column (row position). + std::vector file_schema; + // Layout of the block returned by file reader, determined by column mapping and file + // schema. It is used for file reader to materialize columns into correct type and position. + std::vector file_block_layout; + Block block_template; + }; + DataReader _data_reader; + std::vector _projected_columns; + std::unique_ptr _current_task; + std::optional _current_file_description; + // Range-level compression has higher priority than scan-param compression. TVF/load can keep + // the logical format as CSV/TEXT while carrying the concrete compression such as GZ or LZO on + // each TFileRangeDesc, matching the old FileScanner reader contract. + TFileCompressType::type _current_range_compress_type = TFileCompressType::UNKNOWN; + std::optional _current_range_load_id; + TFileRangeDesc _current_file_range_desc; + std::shared_ptr _system_properties; + // partition key -> value + std::map _partition_values; + // Predicates built from scan conjuncts before file-level localization. + std::vector _table_filters; + TableColumnPredicates _table_column_predicates; + VExprContextSPtrs _conjuncts; + ReadProfile _profile; + // Parsed from row-position based delete files, including position delete and deletion vector. + DeleteRows* _delete_rows = nullptr; + TFileScanRangeParams* _scan_params; + std::shared_ptr _io_ctx; + RuntimeState* _runtime_state; + RuntimeProfile* _scanner_profile; + const std::vector* _file_slot_descs = nullptr; + FileFormat _format; + TPushAggOp::type _push_down_agg_type = TPushAggOp::type::NONE; + size_t _batch_size = 0; + uint64_t _condition_cache_digest = 0; + segment_v2::ConditionCache::ExternalCacheKey _condition_cache_key; + std::shared_ptr> _condition_cache; + std::shared_ptr _condition_cache_ctx; + int64_t _condition_cache_hit_count = 0; + bool _current_reader_reached_eof = false; + int64_t _remaining_table_level_count = -1; + std::optional _global_rowid_context; + bool _aggregate_pushdown_tried = false; + TableColumnMapperOptions _mapper_options; + +private: + static const ColumnDefinition* _find_column_definition( + const std::vector& schema, LocalColumnId column_id) { + for (const auto& field : schema) { + if (field.file_local_id() == column_id.value()) { + return &field; + } + } + return nullptr; + } + + static bool _can_push_down_minmax_for_mapping(const ColumnMapping& mapping) { + if (mapping.child_mappings.empty()) { + return true; + } + const auto primitive_type = remove_nullable(mapping.file_type)->get_primitive_type(); + if (primitive_type != TYPE_STRUCT) { + return false; + } + size_t mapped_children = 0; + const ColumnMapping* mapped_child = nullptr; + for (const auto& child_mapping : mapping.child_mappings) { + if (!child_mapping.file_local_id.has_value()) { + continue; + } + ++mapped_children; + mapped_child = &child_mapping; + } + return mapped_children == 1 && mapped_child != nullptr && + _can_push_down_minmax_for_mapping(*mapped_child); + } + + static Status build_aggregate_projection(const ColumnMapping& mapping, + LocalColumnIndex* projection) { + DORIS_CHECK(projection != nullptr); + DORIS_CHECK(mapping.file_local_id.has_value()); + *projection = LocalColumnIndex::local(*mapping.file_local_id); + projection->children.clear(); + projection->project_all_children = true; + if (mapping.child_mappings.empty()) { + return Status::OK(); + } + projection->project_all_children = false; + for (const auto& child_mapping : mapping.child_mappings) { + if (!child_mapping.file_local_id.has_value()) { + continue; + } + LocalColumnIndex child_projection; + RETURN_IF_ERROR(build_aggregate_projection(child_mapping, &child_projection)); + projection->children.push_back(std::move(child_projection)); + } + DORIS_CHECK(projection->children.size() == 1); + return Status::OK(); + } + + static Status _insert_aggregate_projection_value(const LocalColumnIndex& projection, + const Field& value, IColumn* column) { + DORIS_CHECK(column != nullptr); + if (auto* nullable_column = check_and_get_column(*column)) { + RETURN_IF_ERROR(_insert_aggregate_projection_value( + projection, value, &nullable_column->get_nested_column())); + nullable_column->get_null_map_data().push_back(0); + return Status::OK(); + } + if (projection.project_all_children || projection.children.empty()) { + column->insert(value); + return Status::OK(); + } + auto* struct_column = assert_cast(column); + DORIS_CHECK(projection.children.size() == 1); + const auto& child_projection = projection.children[0]; + DORIS_CHECK(struct_column->get_columns().size() == 1); + RETURN_IF_ERROR(_insert_aggregate_projection_value(child_projection, value, + &struct_column->get_column(0))); + return Status::OK(); + } + + // Parse row-position deletes from table format specific parameters, and fill in _delete_rows. + Status _parse_delete_predicates(const SplitReadOptions& options); +}; + +} // namespace doris::format diff --git a/be/src/io/file_factory.cpp b/be/src/io/file_factory.cpp index 553cdc4460e15c..9610bc028595ec 100644 --- a/be/src/io/file_factory.cpp +++ b/be/src/io/file_factory.cpp @@ -57,21 +57,20 @@ namespace doris { constexpr std::string_view RANDOM_CACHE_BASE_PATH = "random"; -io::FileReaderOptions FileFactory::get_reader_options(RuntimeState* state, +io::FileReaderOptions FileFactory::get_reader_options(const TQueryOptions& option, const io::FileDescription& fd) { io::FileReaderOptions opts { .cache_base_path {}, .file_size = fd.file_size, .mtime = fd.mtime, }; - if (config::enable_file_cache && state != nullptr && - state->query_options().__isset.enable_file_cache && - state->query_options().enable_file_cache && fd.file_cache_admission) { + if (config::enable_file_cache && option.__isset.enable_file_cache && option.enable_file_cache && + fd.file_cache_admission) { opts.cache_type = io::FileCachePolicy::FILE_BLOCK_CACHE; } - if (state != nullptr && state->query_options().__isset.file_cache_base_path && - state->query_options().file_cache_base_path != RANDOM_CACHE_BASE_PATH) { - opts.cache_base_path = state->query_options().file_cache_base_path; + if (option.__isset.file_cache_base_path && + option.file_cache_base_path != RANDOM_CACHE_BASE_PATH) { + opts.cache_base_path = option.file_cache_base_path; } return opts; } diff --git a/be/src/io/file_factory.h b/be/src/io/file_factory.h index 7d662e4fdde469..33595313b921b1 100644 --- a/be/src/io/file_factory.h +++ b/be/src/io/file_factory.h @@ -16,6 +16,7 @@ // under the License. #pragma once +#include #include #include #include @@ -64,6 +65,8 @@ struct FileDescription { // -1 means unset. // If the file length is not set, the file length will be fetched from the file system. int64_t file_size = -1; + int64_t range_start_offset = 0; + int64_t range_size = -1; // modification time of this file. // 0 means unset. int64_t mtime = 0; @@ -83,7 +86,7 @@ class FileFactory { ENABLE_FACTORY_CREATOR(FileFactory); public: - static io::FileReaderOptions get_reader_options(RuntimeState* state, + static io::FileReaderOptions get_reader_options(const TQueryOptions& option, const io::FileDescription& fd); /// Create a temporary FileSystem for accessing file corresponding to `file_description` diff --git a/be/src/io/io_common.h b/be/src/io/io_common.h index 36b20517afb87c..566e376219efab 100644 --- a/be/src/io/io_common.h +++ b/be/src/io/io_common.h @@ -97,6 +97,10 @@ struct IOContext { // if `is_warmup` == true, this I/O request is from a warm up task bool is_warmup {false}; int64_t condition_cache_filtered_rows = 0; + // Rows removed by file-local predicate conjuncts inside FileReader/TableReader. Scanner-level + // output filtering already records its own unselected rows; this counter carries the rows that + // were filtered before the block returned to Scanner. + int64_t predicate_filtered_rows = 0; }; } // namespace io diff --git a/be/src/storage/segment/condition_cache.h b/be/src/storage/segment/condition_cache.h index 511b9c56abac5e..a189312ee1427a 100644 --- a/be/src/storage/segment/condition_cache.h +++ b/be/src/storage/segment/condition_cache.h @@ -26,6 +26,7 @@ #include #include #include +#include #include "common/config.h" #include "common/status.h" @@ -38,7 +39,19 @@ #include "util/slice.h" #include "util/time.h" -namespace doris::segment_v2 { +namespace doris { + +// Context passed from scan/table-reader layers to physical readers for condition cache +// integration. On MISS, readers set filter_result[granule] to true when row-level predicates keep +// at least one row in that granule. On HIT, readers skip granules whose cached bit is false. +struct ConditionCacheContext { + bool is_hit = false; + std::shared_ptr> filter_result; // per-granule: true = has surviving rows + int64_t base_granule = 0; // global granule index of filter_result[0] + static constexpr int GRANULE_SIZE = 2048; +}; + +namespace segment_v2 { class ConditionCacheHandle; @@ -167,4 +180,5 @@ class ConditionCacheHandle { DISALLOW_COPY_AND_ASSIGN(ConditionCacheHandle); }; -} // namespace doris::segment_v2 +} // namespace segment_v2 +} // namespace doris diff --git a/be/src/util/jni-util.h b/be/src/util/jni-util.h index b230ac67f4778e..de9030b5b3a7c7 100644 --- a/be/src/util/jni-util.h +++ b/be/src/util/jni-util.h @@ -606,6 +606,14 @@ class Object { bool uninitialized() const { return _obj == nullptr; } + void reset(JNIEnv* env) { + if (_obj == nullptr) { + return; + } + RefHelper::destroy(env, _obj); + _obj = nullptr; + } + template bool equal(JNIEnv* env, const Object& other) { DCHECK(!uninitialized()); diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt index 2edcff5eef87c9..95d2a435d8d00e 100644 --- a/be/test/CMakeLists.txt +++ b/be/test/CMakeLists.txt @@ -30,6 +30,7 @@ file(GLOB_RECURSE UT_FILES CONFIGURE_DEPENDS exec/*.cpp exprs/*.cpp format/*.cpp + format_v2/*.cpp gutil/*.cpp io/*.cpp load/*.cpp diff --git a/be/test/core/data_type_serde/data_type_serde_decoded_values_test.cpp b/be/test/core/data_type_serde/data_type_serde_decoded_values_test.cpp new file mode 100644 index 00000000000000..69cf458e2fdc5f --- /dev/null +++ b/be/test/core/data_type_serde/data_type_serde_decoded_values_test.cpp @@ -0,0 +1,1852 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "core/assert_cast.h" +#include "core/column/column_decimal.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_date_or_datetime_v2.h" +#include "core/data_type/data_type_decimal.h" +#include "core/data_type/data_type_nothing.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type/data_type_time.h" +#include "core/data_type/data_type_timestamptz.h" +#include "core/data_type_serde/decoded_column_view.h" +#include "core/field.h" +#include "core/string_ref.h" +#include "core/value/timestamptz_value.h" +#include "util/timezone_utils.h" + +namespace doris { +namespace { + +struct ReadColumnResult { + Status status; + MutableColumnPtr column; +}; + +template +DecodedColumnView make_fixed_view(DecodedValueKind kind, const std::vector& values, + const std::vector* null_map = nullptr) { + DecodedColumnView view; + view.value_kind = kind; + view.row_count = null_map != nullptr ? static_cast(null_map->size()) + : static_cast(values.size()); + view.values = values.empty() ? nullptr : reinterpret_cast(values.data()); + view.null_map = null_map == nullptr || null_map->empty() ? nullptr : null_map->data(); + return view; +} + +DecodedColumnView make_binary_view(DecodedValueKind kind, const std::vector& values, + int fixed_length = -1, + const std::vector* null_map = nullptr) { + DecodedColumnView view; + view.value_kind = kind; + view.row_count = null_map != nullptr ? static_cast(null_map->size()) + : static_cast(values.size()); + view.binary_values = values.empty() ? nullptr : &values; + view.fixed_length = fixed_length; + view.null_map = null_map == nullptr || null_map->empty() ? nullptr : null_map->data(); + return view; +} + +DecodedColumnView make_bool_view(const std::vector& values, + const std::vector* null_map = nullptr) { + DecodedColumnView view; + view.value_kind = DecodedValueKind::BOOL; + view.row_count = null_map != nullptr ? static_cast(null_map->size()) + : static_cast(values.size()); + view.values = values.empty() ? nullptr : reinterpret_cast(values.data()); + view.null_map = null_map == nullptr || null_map->empty() ? nullptr : null_map->data(); + return view; +} + +DecodedColumnView with_logical_integer(DecodedColumnView view, int bit_width, bool is_signed) { + view.logical_integer_bit_width = bit_width; + view.logical_integer_is_signed = is_signed; + return view; +} + +ReadColumnResult read_column(const DataTypePtr& type, const DecodedColumnView& view) { + auto column = type->create_column(); + auto status = type->get_serde()->read_column_from_decoded_values(*column, view); + return {std::move(status), std::move(column)}; +} + +void expect_not_supported(const Status& status) { + EXPECT_FALSE(status.ok()); + EXPECT_EQ(ErrorCode::NOT_IMPLEMENTED_ERROR, status.code()) << status; +} + +void expect_corruption(const Status& status) { + EXPECT_FALSE(status.ok()); + EXPECT_EQ(ErrorCode::CORRUPTION, status.code()) << status; +} + +void expect_data_quality_error(const Status& status) { + EXPECT_FALSE(status.ok()); + EXPECT_EQ(ErrorCode::DATA_QUALITY_ERROR, status.code()) << status; +} + +void expect_column_strings(const IDataType& type, const IColumn& column, + const std::vector& expected) { + ASSERT_EQ(expected.size(), column.size()); + for (size_t row = 0; row < expected.size(); ++row) { + EXPECT_EQ(expected[row], type.to_string(column, row)) << "row=" << row; + } +} + +void expect_binary_column(const IColumn& column, const std::vector& expected) { + const auto& string_column = assert_cast(column); + ASSERT_EQ(expected.size(), string_column.size()); + for (size_t row = 0; row < expected.size(); ++row) { + const auto value = string_column.get_data_at(row); + EXPECT_EQ(expected[row], std::string(value.data, value.size)) << "row=" << row; + } +} + +void expect_nullable_all_null(const IColumn& column, size_t expected_size) { + const auto& nullable_column = assert_cast(column); + ASSERT_EQ(expected_size, nullable_column.size()); + ASSERT_EQ(expected_size, nullable_column.get_nested_column().size()); + for (size_t row = 0; row < expected_size; ++row) { + EXPECT_TRUE(nullable_column.is_null_at(row)) << "row=" << row; + } +} + +Field read_field(const DataTypePtr& type, const DecodedColumnView& view) { + Field field; + auto status = type->get_serde()->read_field_from_decoded_value(*type, &field, view); + EXPECT_TRUE(status.ok()) << status; + return field; +} + +Status read_field_status(const DataTypePtr& type, const DecodedColumnView& view) { + Field field; + return type->get_serde()->read_field_from_decoded_value(*type, &field, view); +} + +std::vector string_refs(const std::vector& values) { + std::vector refs; + refs.reserve(values.size()); + for (const auto& value : values) { + refs.emplace_back(value.data(), value.size()); + } + return refs; +} + +#pragma pack(1) +struct TestInt96Timestamp { + int64_t nanos_of_day; + int32_t julian_day; +}; +#pragma pack() + +static_assert(sizeof(TestInt96Timestamp) == 12); + +Decimal128V3 decimal128_v3(Int128 value) { + return Decimal128V3(value); +} + +Decimal256 decimal256_from_int64(int64_t value) { + return Decimal256(wide::Int256(value)); +} + +} // namespace + +// ---------------------------------------------------------------------- +// Base SerDe behavior +// ---------------------------------------------------------------------- +// These cases define the default contract for types that have not implemented decoded-value +// materialization. Batch reads must report NotSupported, and the single-field path must surface +// the same error because it is implemented by delegating to the batch reader. + +TEST(DataTypeSerDeDecodedValuesTest, BaseSerdeRejectsDecodedValues) { + auto type = std::make_shared(); + std::vector values = {1}; + auto view = make_fixed_view(DecodedValueKind::INT32, values); + + auto result = read_column(type, view); + + expect_not_supported(result.status); + EXPECT_EQ(0, result.column->size()); + EXPECT_NE(std::string::npos, result.status.to_string().find("Nothing")); +} + +TEST(DataTypeSerDeDecodedValuesTest, BaseFieldUsesBatchReaderAndPropagatesError) { + auto type = std::make_shared(); + std::vector values = {1}; + auto view = make_fixed_view(DecodedValueKind::INT32, values); + Field field = Field::create_field(123); + + auto status = type->get_serde()->read_field_from_decoded_value(*type, &field, view); + + expect_not_supported(status); + EXPECT_EQ(TYPE_INT, field.get_type()); + EXPECT_EQ(123, field.get()); +} + +// ---------------------------------------------------------------------- +// Number SerDe happy path +// ---------------------------------------------------------------------- +// The numeric matrix verifies physical kind dispatch and the exact static_cast behavior used by +// the reader. Narrow integer overflow is intentionally locked to current C++ conversion behavior; +// if product semantics change to reject overflow, these expectations should be updated with the +// implementation change. + +TEST(DataTypeSerDeDecodedValuesTest, ReadBooleanFromBool) { + auto type = std::make_shared(); + std::vector values = {true, false, true}; + auto view = make_bool_view(values); + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + ASSERT_EQ(3, column.size()); + EXPECT_EQ(1, column.get_element(0)); + EXPECT_EQ(0, column.get_element(1)); + EXPECT_EQ(1, column.get_element(2)); +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadSignedIntegersFromInt32) { + std::vector values = {0, 1, -1, 127, -128}; + auto view = make_fixed_view(DecodedValueKind::INT32, values); + + { + auto result = read_column(std::make_shared(), view); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + ASSERT_EQ(values.size(), column.size()); + EXPECT_EQ(0, column.get_element(0)); + EXPECT_EQ(1, column.get_element(1)); + EXPECT_EQ(-1, column.get_element(2)); + EXPECT_EQ(127, column.get_element(3)); + EXPECT_EQ(-128, column.get_element(4)); + } + { + auto result = read_column(std::make_shared(), view); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + ASSERT_EQ(values.size(), column.size()); + for (size_t row = 0; row < values.size(); ++row) { + EXPECT_EQ(static_cast(values[row]), column.get_element(row)); + } + } + { + auto result = read_column(std::make_shared(), view); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + ASSERT_EQ(values.size(), column.size()); + for (size_t row = 0; row < values.size(); ++row) { + EXPECT_EQ(values[row], column.get_element(row)); + } + } + { + auto result = read_column(std::make_shared(), view); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + ASSERT_EQ(values.size(), column.size()); + for (size_t row = 0; row < values.size(); ++row) { + EXPECT_EQ(static_cast(values[row]), column.get_element(row)); + } + } + { + auto result = read_column(std::make_shared(), view); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + ASSERT_EQ(values.size(), column.size()); + for (size_t row = 0; row < values.size(); ++row) { + EXPECT_EQ(static_cast<__int128_t>(values[row]), column.get_element(row)); + } + } +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadSignedIntegersFromInt64) { + std::vector values = {0, 1, -1, 127, -128}; + auto view = make_fixed_view(DecodedValueKind::INT64, values); + + auto tiny = read_column(std::make_shared(), view); + ASSERT_TRUE(tiny.status.ok()) << tiny.status; + const auto& tiny_column = assert_cast(*tiny.column); + EXPECT_EQ(127, tiny_column.get_element(3)); + EXPECT_EQ(-128, tiny_column.get_element(4)); + + auto small = read_column(std::make_shared(), view); + ASSERT_TRUE(small.status.ok()) << small.status; + const auto& small_column = assert_cast(*small.column); + EXPECT_EQ(127, small_column.get_element(3)); + EXPECT_EQ(-128, small_column.get_element(4)); + + auto integer = read_column(std::make_shared(), view); + ASSERT_TRUE(integer.status.ok()) << integer.status; + const auto& int_column = assert_cast(*integer.column); + EXPECT_EQ(127, int_column.get_element(3)); + EXPECT_EQ(-128, int_column.get_element(4)); + + auto bigint = read_column(std::make_shared(), view); + ASSERT_TRUE(bigint.status.ok()) << bigint.status; + const auto& bigint_column = assert_cast(*bigint.column); + ASSERT_EQ(values.size(), bigint_column.size()); + for (size_t row = 0; row < values.size(); ++row) { + EXPECT_EQ(values[row], bigint_column.get_element(row)); + } + + auto largeint = read_column(std::make_shared(), view); + ASSERT_TRUE(largeint.status.ok()) << largeint.status; + const auto& largeint_column = assert_cast(*largeint.column); + ASSERT_EQ(values.size(), largeint_column.size()); + for (size_t row = 0; row < values.size(); ++row) { + EXPECT_EQ(static_cast<__int128_t>(values[row]), largeint_column.get_element(row)); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadIntegersFromUnsignedSources) { + { + std::vector values = {0, 1, std::numeric_limits::max()}; + auto view = make_fixed_view(DecodedValueKind::UINT32, values); + auto result = read_column(std::make_shared(), view); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + EXPECT_EQ(0, column.get_element(0)); + EXPECT_EQ(1, column.get_element(1)); + EXPECT_EQ(static_cast(std::numeric_limits::max()), + column.get_element(2)); + } + { + std::vector values = {0, 1, std::numeric_limits::max()}; + auto view = make_fixed_view(DecodedValueKind::UINT64, values); + auto result = read_column(std::make_shared(), view); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + EXPECT_EQ(0, column.get_element(0)); + EXPECT_EQ(1, column.get_element(1)); + EXPECT_EQ(static_cast<__int128_t>(std::numeric_limits::max()), + column.get_element(2)); + } + { + std::vector values = {static_cast(std::numeric_limits::max())}; + auto view = make_fixed_view(DecodedValueKind::UINT64, values); + auto result = read_column(std::make_shared(), view); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + EXPECT_EQ(std::numeric_limits::max(), column.get_element(0)); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadUnsignedLogicalIntegersCastsPhysicalValues) { + { + std::vector values = {0, 127, 255, 32767, 65535, -1}; + auto view = + with_logical_integer(make_fixed_view(DecodedValueKind::INT32, values), 8, false); + auto result = read_column(std::make_shared(), view); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + ASSERT_EQ(values.size(), column.size()); + EXPECT_EQ(0, column.get_element(0)); + EXPECT_EQ(127, column.get_element(1)); + EXPECT_EQ(255, column.get_element(2)); + EXPECT_EQ(255, column.get_element(3)); + EXPECT_EQ(255, column.get_element(4)); + EXPECT_EQ(255, column.get_element(5)); + } + { + std::vector values = {32767, 65535, -1}; + auto view = + with_logical_integer(make_fixed_view(DecodedValueKind::INT32, values), 16, false); + auto result = read_column(std::make_shared(), view); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + ASSERT_EQ(values.size(), column.size()); + EXPECT_EQ(32767, column.get_element(0)); + EXPECT_EQ(65535, column.get_element(1)); + EXPECT_EQ(65535, column.get_element(2)); + } + { + std::vector values = {-1}; + auto view = + with_logical_integer(make_fixed_view(DecodedValueKind::UINT32, values), 32, false); + auto result = read_column(std::make_shared(), view); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + ASSERT_EQ(1, column.size()); + EXPECT_EQ(4294967295LL, column.get_element(0)); + } + { + std::vector values = {-1}; + auto view = + with_logical_integer(make_fixed_view(DecodedValueKind::UINT64, values), 64, false); + auto result = read_column(std::make_shared(), view); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + ASSERT_EQ(1, column.size()); + EXPECT_EQ(static_cast<__int128_t>(std::numeric_limits::max()), + column.get_element(0)); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadSignedLogicalIntegersCastsPhysicalValues) { + std::vector values = {127, 128, 255, -1}; + auto view = with_logical_integer(make_fixed_view(DecodedValueKind::INT32, values), 8, true); + auto result = read_column(std::make_shared(), view); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + ASSERT_EQ(values.size(), column.size()); + EXPECT_EQ(static_cast(127), column.get_element(0)); + EXPECT_EQ(static_cast(-128), column.get_element(1)); + EXPECT_EQ(static_cast(-1), column.get_element(2)); + EXPECT_EQ(static_cast(-1), column.get_element(3)); +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadFloatAndDouble) { + { + auto type = std::make_shared(); + std::vector values = {0.0F, -0.0F, 1.5F, -2.25F}; + auto result = read_column(type, make_fixed_view(DecodedValueKind::FLOAT, values)); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + EXPECT_FLOAT_EQ(0.0F, column.get_element(0)); + EXPECT_TRUE(std::signbit(column.get_element(1))); + EXPECT_FLOAT_EQ(1.5F, column.get_element(2)); + EXPECT_FLOAT_EQ(-2.25F, column.get_element(3)); + } + { + auto type = std::make_shared(); + std::vector values = {0.0, -0.0, 1.5, -2.25}; + auto result = read_column(type, make_fixed_view(DecodedValueKind::DOUBLE, values)); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + EXPECT_DOUBLE_EQ(0.0, column.get_element(0)); + EXPECT_TRUE(std::signbit(column.get_element(1))); + EXPECT_DOUBLE_EQ(1.5, column.get_element(2)); + EXPECT_DOUBLE_EQ(-2.25, column.get_element(3)); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadFloatSpecialValues) { + { + std::vector values = {std::numeric_limits::quiet_NaN(), + std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}; + auto result = read_column(std::make_shared(), + make_fixed_view(DecodedValueKind::FLOAT, values)); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + EXPECT_TRUE(std::isnan(column.get_element(0))); + EXPECT_TRUE(std::isinf(column.get_element(1))); + EXPECT_FALSE(std::signbit(column.get_element(1))); + EXPECT_TRUE(std::isinf(column.get_element(2))); + EXPECT_TRUE(std::signbit(column.get_element(2))); + } + { + std::vector values = {std::numeric_limits::quiet_NaN(), + std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}; + auto result = read_column(std::make_shared(), + make_fixed_view(DecodedValueKind::DOUBLE, values)); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + EXPECT_TRUE(std::isnan(column.get_element(0))); + EXPECT_TRUE(std::isinf(column.get_element(1))); + EXPECT_FALSE(std::signbit(column.get_element(1))); + EXPECT_TRUE(std::isinf(column.get_element(2))); + EXPECT_TRUE(std::signbit(column.get_element(2))); + } +} + +// ---------------------------------------------------------------------- +// Number SerDe error paths +// ---------------------------------------------------------------------- +// These cases separate unsupported physical kinds from corrupt decoded buffers. Unsupported kinds +// must not append to the destination column; missing value buffers are allowed only for empty or +// all-null batches where no non-null row can dereference the buffer. + +TEST(DataTypeSerDeDecodedValuesTest, NumberRejectsMismatchedKind) { + struct Case { + DataTypePtr type; + DecodedValueKind kind; + }; + std::vector cases = { + {std::make_shared(), DecodedValueKind::INT32}, + {std::make_shared(), DecodedValueKind::BOOL}, + {std::make_shared(), DecodedValueKind::DOUBLE}, + {std::make_shared(), DecodedValueKind::FLOAT}, + {std::make_shared(), DecodedValueKind::BINARY}, + }; + + for (const auto& test_case : cases) { + std::vector values = {1}; + auto result = read_column(test_case.type, make_fixed_view(test_case.kind, values)); + expect_not_supported(result.status); + EXPECT_EQ(0, result.column->size()); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, NumberRejectsMissingValuesWhenNonNullExists) { + auto type = std::make_shared(); + { + DecodedColumnView view; + view.value_kind = DecodedValueKind::INT32; + view.row_count = 3; + auto result = read_column(type, view); + expect_corruption(result.status); + } + { + std::vector null_map = {1, 0, 1}; + DecodedColumnView view; + view.value_kind = DecodedValueKind::INT32; + view.row_count = 3; + view.null_map = null_map.data(); + auto result = read_column(type, view); + expect_corruption(result.status); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, NumberAllowsMissingValuesForAllNullOrEmpty) { + auto type = std::make_shared(std::make_shared()); + { + DecodedColumnView view; + view.value_kind = DecodedValueKind::INT32; + view.row_count = 0; + auto result = read_column(type, view); + ASSERT_TRUE(result.status.ok()) << result.status; + EXPECT_EQ(0, result.column->size()); + } + { + std::vector null_map = {1, 1, 1}; + DecodedColumnView view; + view.value_kind = DecodedValueKind::INT32; + view.row_count = 3; + view.null_map = null_map.data(); + auto result = read_column(type, view); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& nullable_column = assert_cast(*result.column); + const auto& nested_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(3, nullable_column.size()); + for (size_t row = 0; row < nullable_column.size(); ++row) { + EXPECT_TRUE(nullable_column.is_null_at(row)); + EXPECT_EQ(0, nested_column.get_element(row)); + } + } +} + +TEST(DataTypeSerDeDecodedValuesTest, NumberRejectsOutOfRangeValueInStrictMode) { + auto type = std::make_shared(std::make_shared()); + std::vector values = {127, 128}; + std::vector null_map = {0, 0}; + auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map); + view.enable_strict_mode = true; + + auto result = read_column(type, view); + + expect_data_quality_error(result.status); + const auto& nullable_column = assert_cast(*result.column); + EXPECT_EQ(0, nullable_column.size()); + EXPECT_EQ(0, nullable_column.get_null_map_data().size()); + EXPECT_EQ(0, nullable_column.get_nested_column().size()); +} + +TEST(DataTypeSerDeDecodedValuesTest, NumberNullsOutOfRangeValueInNonStrictMode) { + auto type = std::make_shared(std::make_shared()); + std::vector values = {127, 128, -129, -128}; + std::vector null_map = {0, 0, 0, 0}; + auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map); + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& nullable_column = assert_cast(*result.column); + const auto& nested_column = assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(4, nullable_column.size()); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_TRUE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_EQ(127, nested_column.get_element(0)); + EXPECT_EQ(0, nested_column.get_element(1)); + EXPECT_EQ(0, nested_column.get_element(2)); + EXPECT_EQ(-128, nested_column.get_element(3)); +} + +TEST(DataTypeSerDeDecodedValuesTest, NumberRejectsUnsignedOverflowInStrictMode) { + auto type = std::make_shared(std::make_shared()); + std::vector values = {static_cast(std::numeric_limits::max()), + std::numeric_limits::max()}; + std::vector null_map = {0, 0}; + auto view = make_fixed_view(DecodedValueKind::UINT64, values, &null_map); + view.enable_strict_mode = true; + + auto result = read_column(type, view); + + expect_data_quality_error(result.status); +} + +TEST(DataTypeSerDeDecodedValuesTest, NumberNullsUnsignedOverflowInNonStrictMode) { + auto type = std::make_shared(std::make_shared()); + std::vector values = {static_cast(std::numeric_limits::max()), + std::numeric_limits::max()}; + std::vector null_map = {0, 0}; + auto view = make_fixed_view(DecodedValueKind::UINT64, values, &null_map); + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& nullable_column = assert_cast(*result.column); + const auto& nested_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(2, nullable_column.size()); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_EQ(std::numeric_limits::max(), nested_column.get_element(0)); + EXPECT_EQ(0, nested_column.get_element(1)); +} + +// ---------------------------------------------------------------------- +// String / Binary SerDe +// ---------------------------------------------------------------------- +// String-like decoded reads must preserve exact byte sequences. The embedded-NUL case prevents +// accidental C-string truncation. Nullable string tests ensure null rows materialize default nested +// values while the outer null map remains authoritative. + +TEST(DataTypeSerDeDecodedValuesTest, ReadStringFromBinary) { + auto type = std::make_shared(); + std::vector storage = {"alpha", "", std::string("a\0b", 3), "utf8-\xe4\xb8\xad"}; + auto refs = string_refs(storage); + + auto result = read_column(type, make_binary_view(DecodedValueKind::BINARY, refs)); + + ASSERT_TRUE(result.status.ok()) << result.status; + expect_binary_column(*result.column, storage); +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadStringFromFixedBinary) { + auto type = std::make_shared(); + std::vector storage = {std::string("\x00\x01\x02\x03", 4), + std::string("\x7f\x80\xfe\xff", 4)}; + auto refs = string_refs(storage); + + auto result = read_column(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 4)); + + ASSERT_TRUE(result.status.ok()) << result.status; + expect_binary_column(*result.column, storage); +} + +TEST(DataTypeSerDeDecodedValuesTest, StringNullMapMaterialization) { + auto type = std::make_shared(std::make_shared()); + std::vector storage = {"alpha", "", "omega"}; + auto refs = string_refs(storage); + std::vector null_map = {0, 1, 0}; + + auto result = + read_column(type, make_binary_view(DecodedValueKind::BINARY, refs, -1, &null_map)); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& nullable_column = assert_cast(*result.column); + ASSERT_EQ(3, nullable_column.size()); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + expect_binary_column(nullable_column.get_nested_column(), {"alpha", "", "omega"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, StringRejectsMismatchedKind) { + auto type = std::make_shared(); + for (auto kind : {DecodedValueKind::INT32, DecodedValueKind::INT64, DecodedValueKind::DOUBLE}) { + std::vector values = {1}; + auto result = read_column(type, make_fixed_view(kind, values)); + expect_not_supported(result.status); + EXPECT_EQ(0, result.column->size()); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, StringRejectsMissingBinaryValuesWhenNonNullExists) { + auto type = std::make_shared(); + DecodedColumnView view; + view.value_kind = DecodedValueKind::BINARY; + view.row_count = 1; + + auto result = read_column(type, view); + + expect_corruption(result.status); +} + +TEST(DataTypeSerDeDecodedValuesTest, StringAllowsAllNullWithoutBinaryValues) { + auto type = std::make_shared(std::make_shared()); + std::vector null_map = {1, 1}; + DecodedColumnView view; + view.value_kind = DecodedValueKind::BINARY; + view.row_count = 2; + view.null_map = null_map.data(); + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& nullable_column = assert_cast(*result.column); + ASSERT_EQ(2, nullable_column.size()); + EXPECT_TRUE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + expect_binary_column(nullable_column.get_nested_column(), {"", ""}); +} + +// ---------------------------------------------------------------------- +// DateV2 SerDe +// ---------------------------------------------------------------------- +// DateV2 accepts Parquet DATE-style epoch days as INT32. Null rows insert default nested dates and +// missing buffers are rejected only when a non-null row requires a value. + +TEST(DataTypeSerDeDecodedValuesTest, ReadDateV2FromEpochDays) { + auto type = std::make_shared(); + std::vector values = {-1, 0, 1, 18628, 18321}; + + auto result = read_column(type, make_fixed_view(DecodedValueKind::INT32, values)); + + ASSERT_TRUE(result.status.ok()) << result.status; + expect_column_strings(*type, *result.column, + {"1969-12-31", "1970-01-01", "1970-01-02", "2021-01-01", "2020-02-29"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, DateV2HandlesNulls) { + auto type = std::make_shared(std::make_shared()); + std::vector values = {0, 1, 2}; + std::vector null_map = {0, 1, 0}; + + auto result = read_column(type, make_fixed_view(DecodedValueKind::INT32, values, &null_map)); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& nullable_column = assert_cast(*result.column); + ASSERT_EQ(3, nullable_column.size()); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + expect_column_strings(*type, *result.column, {"1970-01-01", "NULL", "1970-01-03"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, DateV2RejectsInvalidKind) { + auto type = std::make_shared(); + for (auto kind : + {DecodedValueKind::INT64, DecodedValueKind::BINARY, DecodedValueKind::DOUBLE}) { + std::vector values = {0}; + auto result = read_column(type, make_fixed_view(kind, values)); + expect_not_supported(result.status); + EXPECT_EQ(0, result.column->size()); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, DateV2RejectsMissingValuesWhenNonNullExists) { + auto type = std::make_shared(); + DecodedColumnView view; + view.value_kind = DecodedValueKind::INT32; + view.row_count = 1; + + auto result = read_column(type, view); + + expect_corruption(result.status); +} + +// ---------------------------------------------------------------------- +// DateTimeV2 SerDe +// ---------------------------------------------------------------------- +// Timestamp decoding covers INT64 micros/millis, UNKNOWN-as-micros compatibility, UTC-adjusted +// conversion with explicit/default timezones, INT96 Julian-day timestamps, and invalid buffer/kind +// errors. Negative epoch values are included to lock correct floor-division behavior. + +TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2Micros) { + auto type = std::make_shared(6); + std::vector values = {-1, 0, 1, 1234567, 86400000000LL - 1}; + auto view = make_fixed_view(DecodedValueKind::INT64, values); + view.time_unit = DecodedTimeUnit::MICROS; + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + expect_column_strings(*type, *result.column, + {"1969-12-31 23:59:59.999999", "1970-01-01 00:00:00.000000", + "1970-01-01 00:00:00.000001", "1970-01-01 00:00:01.234567", + "1970-01-01 23:59:59.999999"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2Millis) { + auto type = std::make_shared(6); + std::vector values = {-1, 0, 1, 1234}; + auto view = make_fixed_view(DecodedValueKind::INT64, values); + view.time_unit = DecodedTimeUnit::MILLIS; + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + expect_column_strings(*type, *result.column, + {"1969-12-31 23:59:59.999000", "1970-01-01 00:00:00.000000", + "1970-01-01 00:00:00.001000", "1970-01-01 00:00:01.234000"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2Nanos) { + auto type = std::make_shared(6); + std::vector values = {-1000, 0, 1000, 1234567890}; + auto view = make_fixed_view(DecodedValueKind::INT64, values); + view.time_unit = DecodedTimeUnit::NANOS; + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + expect_column_strings(*type, *result.column, + {"1969-12-31 23:59:59.999999", "1970-01-01 00:00:00.000000", + "1970-01-01 00:00:00.000001", "1970-01-01 00:00:01.234567"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2UnknownUnitAsMicros) { + auto type = std::make_shared(6); + std::vector values = {1000000}; + auto view = make_fixed_view(DecodedValueKind::INT64, values); + view.time_unit = DecodedTimeUnit::UNKNOWN; + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + expect_column_strings(*type, *result.column, {"1970-01-01 00:00:01.000000"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2UtcAdjustedDefaultUtc) { + auto type = std::make_shared(6); + std::vector values = {0}; + auto view = make_fixed_view(DecodedValueKind::INT64, values); + view.time_unit = DecodedTimeUnit::MICROS; + view.timestamp_is_adjusted_to_utc = true; + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + expect_column_strings(*type, *result.column, {"1970-01-01 00:00:00.000000"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2UtcAdjustedWithTimezones) { + auto type = std::make_shared(6); + std::vector values = {0, -1, 1234567}; + cctz::time_zone shanghai; + cctz::time_zone new_york; + ASSERT_TRUE(TimezoneUtils::find_cctz_time_zone("+08:00", shanghai)); + ASSERT_TRUE(TimezoneUtils::find_cctz_time_zone("-05:00", new_york)); + + auto shanghai_view = make_fixed_view(DecodedValueKind::INT64, values); + shanghai_view.time_unit = DecodedTimeUnit::MICROS; + shanghai_view.timestamp_is_adjusted_to_utc = true; + shanghai_view.timezone = &shanghai; + auto shanghai_result = read_column(type, shanghai_view); + ASSERT_TRUE(shanghai_result.status.ok()) << shanghai_result.status; + expect_column_strings(*type, *shanghai_result.column, + {"1970-01-01 08:00:00.000000", "1970-01-01 07:59:59.999999", + "1970-01-01 08:00:01.234567"}); + + auto new_york_view = make_fixed_view(DecodedValueKind::INT64, values); + new_york_view.time_unit = DecodedTimeUnit::MICROS; + new_york_view.timestamp_is_adjusted_to_utc = true; + new_york_view.timezone = &new_york; + auto new_york_result = read_column(type, new_york_view); + ASSERT_TRUE(new_york_result.status.ok()) << new_york_result.status; + expect_column_strings(*type, *new_york_result.column, + {"1969-12-31 19:00:00.000000", "1969-12-31 18:59:59.999999", + "1969-12-31 19:00:01.234567"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2Int96) { + auto type = std::make_shared(std::make_shared(6)); + std::vector values = { + {0, 2440588}, + {86399999999000LL, 2440587}, + {0, 2440589}, + }; + std::vector null_map = {0, 0, 1}; + auto view = make_fixed_view(DecodedValueKind::INT96, values, &null_map); + cctz::time_zone shanghai; + ASSERT_TRUE(TimezoneUtils::find_cctz_time_zone("+08:00", shanghai)); + view.timezone = &shanghai; + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + expect_column_strings(*type, *result.column, + {"1970-01-01 08:00:00.000000", "1970-01-01 07:59:59.999999", "NULL"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadTimestampTzInt64AsUtcInstant) { + auto type = std::make_shared(6); + // 2024-12-31 16:00:00 UTC is displayed as 2025-01-01 00:00:00+08:00. + cctz::time_zone shanghai; + ASSERT_TRUE(TimezoneUtils::find_cctz_time_zone("+08:00", shanghai)); + + std::vector micros_values = {1735660800000000LL, 1735660800123456LL}; + auto micros_view = make_fixed_view(DecodedValueKind::INT64, micros_values); + micros_view.time_unit = DecodedTimeUnit::MICROS; + auto micros_result = read_column(type, micros_view); + ASSERT_TRUE(micros_result.status.ok()) << micros_result.status; + const auto& micros_column = assert_cast(*micros_result.column); + EXPECT_EQ(micros_column.get_element(0).to_string(shanghai, 6), + "2025-01-01 00:00:00.000000+08:00"); + EXPECT_EQ(micros_column.get_element(1).to_string(shanghai, 6), + "2025-01-01 00:00:00.123456+08:00"); + + std::vector millis_values = {1735660800000LL}; + auto millis_view = make_fixed_view(DecodedValueKind::INT64, millis_values); + millis_view.time_unit = DecodedTimeUnit::MILLIS; + auto millis_result = read_column(type, millis_view); + ASSERT_TRUE(millis_result.status.ok()) << millis_result.status; + const auto& millis_column = assert_cast(*millis_result.column); + EXPECT_EQ(millis_column.get_element(0).to_string(shanghai, 6), + "2025-01-01 00:00:00.000000+08:00"); + + std::vector nanos_values = {1735660800123456000LL}; + auto nanos_view = make_fixed_view(DecodedValueKind::INT64, nanos_values); + nanos_view.time_unit = DecodedTimeUnit::NANOS; + auto nanos_result = read_column(type, nanos_view); + ASSERT_TRUE(nanos_result.status.ok()) << nanos_result.status; + const auto& nanos_column = assert_cast(*nanos_result.column); + EXPECT_EQ(nanos_column.get_element(0).to_string(shanghai, 6), + "2025-01-01 00:00:00.123456+08:00"); +} + +TEST(DataTypeSerDeDecodedValuesTest, TimestampTzReadsInt96AsUtcInstant) { + auto type = std::make_shared(6); + std::vector values = {{0, 2440588}, {123456789000LL, 2440588}}; + auto view = make_fixed_view(DecodedValueKind::INT96, values); + cctz::time_zone shanghai; + ASSERT_TRUE(TimezoneUtils::find_cctz_time_zone("+08:00", shanghai)); + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + EXPECT_EQ(column.get_element(0).to_string(shanghai, 6), "1970-01-01 08:00:00.000000+08:00"); + EXPECT_EQ(column.get_element(1).to_string(shanghai, 6), "1970-01-01 08:02:03.456789+08:00"); +} + +TEST(DataTypeSerDeDecodedValuesTest, DateTimeV2RejectsInvalidKind) { + auto type = std::make_shared(6); + for (auto kind : + {DecodedValueKind::INT32, DecodedValueKind::BINARY, DecodedValueKind::DOUBLE}) { + std::vector values = {0}; + auto result = read_column(type, make_fixed_view(kind, values)); + expect_not_supported(result.status); + EXPECT_EQ(0, result.column->size()); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, DateTimeV2RejectsMissingValuesWhenNonNullExists) { + auto type = std::make_shared(6); + DecodedColumnView view; + view.value_kind = DecodedValueKind::INT64; + view.row_count = 1; + + auto result = read_column(type, view); + + expect_corruption(result.status); +} + +TEST(DataTypeSerDeDecodedValuesTest, DateTimeV2RejectsOutOfRangeEpochWithoutAbort) { + auto type = std::make_shared(6); + std::vector values = {0, -377673580800000001LL}; + auto view = make_fixed_view(DecodedValueKind::INT64, values); + view.time_unit = DecodedTimeUnit::MICROS; + + auto result = read_column(type, view); + + expect_data_quality_error(result.status); + EXPECT_EQ(0, result.column->size()); +} + +TEST(DataTypeSerDeDecodedValuesTest, NullableDateTimeV2RejectsOutOfRangeEpochInStrictMode) { + auto type = std::make_shared(std::make_shared(6)); + std::vector values = {0, -377673580800000001LL}; + std::vector null_map = {0, 0}; + auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map); + view.time_unit = DecodedTimeUnit::MICROS; + view.enable_strict_mode = true; + + auto result = read_column(type, view); + + expect_data_quality_error(result.status); + const auto& nullable_column = assert_cast(*result.column); + EXPECT_EQ(0, nullable_column.size()); + EXPECT_EQ(0, nullable_column.get_null_map_data().size()); + EXPECT_EQ(0, nullable_column.get_nested_column().size()); +} + +TEST(DataTypeSerDeDecodedValuesTest, NullableDateTimeV2NullsOutOfRangeEpochInNonStrictMode) { + auto type = std::make_shared(std::make_shared(6)); + std::vector values = {0, -377673580800000001LL, 1}; + std::vector null_map = {0, 0, 0}; + auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map); + view.time_unit = DecodedTimeUnit::MICROS; + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& nullable_column = assert_cast(*result.column); + ASSERT_EQ(3, nullable_column.size()); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + expect_column_strings(*type, *result.column, + {"1970-01-01 00:00:00.000000", "NULL", "1970-01-01 00:00:00.000001"}); +} + +// ---------------------------------------------------------------------- +// TimeV2 SerDe +// ---------------------------------------------------------------------- +// TimeV2 decodes INT32 as milliseconds and INT64 according to the supplied time unit. Negative +// durations are verified because they use a sign bit in TimeValue::TimeType rather than DateTimeV2 +// epoch arithmetic. + +TEST(DataTypeSerDeDecodedValuesTest, ReadTimeV2FromInt32Millis) { + auto type = std::make_shared(6); + std::vector values = {0, 1, -1, 3661001}; + + auto result = read_column(type, make_fixed_view(DecodedValueKind::INT32, values)); + + ASSERT_TRUE(result.status.ok()) << result.status; + expect_column_strings( + *type, *result.column, + {"00:00:00.000000", "00:00:00.001000", "-00:00:00.001000", "01:01:01.001000"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadTimeV2FromInt64Micros) { + auto type = std::make_shared(6); + std::vector values = {0, 1, -1, 3661000001LL}; + auto view = make_fixed_view(DecodedValueKind::INT64, values); + view.time_unit = DecodedTimeUnit::MICROS; + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + expect_column_strings( + *type, *result.column, + {"00:00:00.000000", "00:00:00.000001", "-00:00:00.000001", "01:01:01.000001"}); + + view.time_unit = DecodedTimeUnit::UNKNOWN; + auto unknown_result = read_column(type, view); + ASSERT_TRUE(unknown_result.status.ok()) << unknown_result.status; + expect_column_strings( + *type, *unknown_result.column, + {"00:00:00.000000", "00:00:00.000001", "-00:00:00.000001", "01:01:01.000001"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadTimeV2FromInt64Millis) { + auto type = std::make_shared(6); + std::vector values = {1, -1, 3661001}; + auto view = make_fixed_view(DecodedValueKind::INT64, values); + view.time_unit = DecodedTimeUnit::MILLIS; + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + expect_column_strings(*type, *result.column, + {"00:00:00.001000", "-00:00:00.001000", "01:01:01.001000"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadTimeV2FromInt64Nanos) { + auto type = std::make_shared(6); + std::vector values = {1000, -1000, 3661000001000LL}; + auto view = make_fixed_view(DecodedValueKind::INT64, values); + view.time_unit = DecodedTimeUnit::NANOS; + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + expect_column_strings(*type, *result.column, + {"00:00:00.000001", "-00:00:00.000001", "01:01:01.000001"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, TimeV2HandlesNulls) { + auto type = std::make_shared(std::make_shared(6)); + std::vector values = {0, 1, 2}; + std::vector null_map = {0, 1, 0}; + auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map); + view.time_unit = DecodedTimeUnit::MICROS; + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& nullable_column = assert_cast(*result.column); + ASSERT_EQ(3, nullable_column.size()); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + expect_column_strings(*type, *result.column, {"00:00:00.000000", "NULL", "00:00:00.000002"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, TimeV2RejectsInvalidKind) { + auto type = std::make_shared(6); + for (auto kind : {DecodedValueKind::BOOL, DecodedValueKind::BINARY, DecodedValueKind::DOUBLE}) { + std::vector values = {0}; + auto result = read_column(type, make_fixed_view(kind, values)); + expect_not_supported(result.status); + EXPECT_EQ(0, result.column->size()); + } +} + +// ---------------------------------------------------------------------- +// Decimal SerDe +// ---------------------------------------------------------------------- +// Decimal cases cover integer-backed values and Parquet big-endian two's-complement binary values. +// String assertions validate the user-visible scale, while direct column checks lock the native +// unscaled value for every decimal width. + +TEST(DataTypeSerDeDecodedValuesTest, ReadDecimal32FromInt32) { + auto type = std::make_shared(9, 2); + std::vector values = {12345, -67, 0}; + auto view = make_fixed_view(DecodedValueKind::INT32, values); + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + EXPECT_EQ(Decimal32(12345), column.get_element(0)); + EXPECT_EQ(Decimal32(-67), column.get_element(1)); + EXPECT_EQ(Decimal32(0), column.get_element(2)); + expect_column_strings(*type, *result.column, {"123.45", "-0.67", "0.00"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadDecimal64FromInt64) { + auto type = std::make_shared(18, 4); + std::vector values = {123456789, -1}; + auto view = make_fixed_view(DecodedValueKind::INT64, values); + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + EXPECT_EQ(Decimal64(123456789), column.get_element(0)); + EXPECT_EQ(Decimal64(-1), column.get_element(1)); + expect_column_strings(*type, *result.column, {"12345.6789", "-0.0001"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadDecimal128FromInt32AndInt64) { + auto type = std::make_shared(38, 6); + { + std::vector values = {123456, -1}; + auto result = read_column(type, make_fixed_view(DecodedValueKind::INT32, values)); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + EXPECT_EQ(decimal128_v3(123456), column.get_element(0)); + EXPECT_EQ(decimal128_v3(-1), column.get_element(1)); + expect_column_strings(*type, *result.column, {"0.123456", "-0.000001"}); + } + { + std::vector values = {1234567890123LL, -1234567LL}; + auto result = read_column(type, make_fixed_view(DecodedValueKind::INT64, values)); + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + EXPECT_EQ(decimal128_v3(1234567890123LL), column.get_element(0)); + EXPECT_EQ(decimal128_v3(-1234567LL), column.get_element(1)); + expect_column_strings(*type, *result.column, {"1234567.890123", "-1.234567"}); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadDecimal256FromInt64) { + auto type = std::make_shared(76, 8); + std::vector values = {std::numeric_limits::max(), + std::numeric_limits::min()}; + auto result = read_column(type, make_fixed_view(DecodedValueKind::INT64, values)); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + EXPECT_EQ(decimal256_from_int64(std::numeric_limits::max()), column.get_element(0)); + EXPECT_EQ(decimal256_from_int64(std::numeric_limits::min()), column.get_element(1)); + expect_column_strings(*type, *result.column, {"92233720368.54775807", "-92233720368.54775808"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadDecimalFromBinaryBigEndian) { + auto type = std::make_shared(18, 2); + std::vector storage = { + std::string("\x00", 1), std::string("\x7f", 1), std::string("\x80", 1), + std::string("\xff", 1), std::string("\xff\xbd", 2), std::string("\x30\x39", 2), + }; + auto refs = string_refs(storage); + + auto result = read_column(type, make_binary_view(DecodedValueKind::BINARY, refs)); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& column = assert_cast(*result.column); + std::vector expected = {decimal128_v3(0), decimal128_v3(127), + decimal128_v3(-128), decimal128_v3(-1), + decimal128_v3(-67), decimal128_v3(12345)}; + ASSERT_EQ(expected.size(), column.size()); + for (size_t row = 0; row < expected.size(); ++row) { + EXPECT_EQ(expected[row], column.get_element(row)) << "row=" << row; + } + expect_column_strings(*type, *result.column, + {"0.00", "1.27", "-1.28", "-0.01", "-0.67", "123.45"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadDecimalFromFixedBinaryLengths) { + { + auto type = std::make_shared(38, 2); + std::vector storage = {std::string("\x00", 1), std::string("\x80", 1)}; + auto refs = string_refs(storage); + auto result = read_column(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 1)); + ASSERT_TRUE(result.status.ok()) << result.status; + expect_column_strings(*type, *result.column, {"0.00", "-1.28"}); + } + { + auto type = std::make_shared(38, 2); + std::vector storage = {std::string("\xff\xbd", 2), std::string("\x30\x39", 2)}; + auto refs = string_refs(storage); + auto result = read_column(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 2)); + ASSERT_TRUE(result.status.ok()) << result.status; + expect_column_strings(*type, *result.column, {"-0.67", "123.45"}); + } + { + auto type = std::make_shared(38, 2); + std::vector storage = {std::string("\0\0\0\0\0\0\x30\x39", 8)}; + auto refs = string_refs(storage); + auto result = read_column(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 8)); + ASSERT_TRUE(result.status.ok()) << result.status; + expect_column_strings(*type, *result.column, {"123.45"}); + } + { + auto type = std::make_shared(38, 2); + std::vector storage = { + std::string("\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xbd", + 16)}; + auto refs = string_refs(storage); + auto result = read_column(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 16)); + ASSERT_TRUE(result.status.ok()) << result.status; + expect_column_strings(*type, *result.column, {"-0.67"}); + } + { + auto type = std::make_shared(76, 2); + std::vector storage = {std::string(31, '\xff') + std::string("\xbd", 1)}; + auto refs = string_refs(storage); + auto result = read_column(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 32)); + ASSERT_TRUE(result.status.ok()) << result.status; + expect_column_strings(*type, *result.column, {"-0.67"}); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, DecimalHandlesNulls) { + auto type = std::make_shared(std::make_shared(18, 2)); + std::vector values = {12345, -1, -67}; + std::vector null_map = {0, 1, 0}; + + auto result = read_column(type, make_fixed_view(DecodedValueKind::INT64, values, &null_map)); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& nullable_column = assert_cast(*result.column); + const auto& decimal_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(3, nullable_column.size()); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_EQ(decimal128_v3(12345), decimal_column.get_element(0)); + EXPECT_EQ(decimal128_v3(0), decimal_column.get_element(1)); + EXPECT_EQ(decimal128_v3(-67), decimal_column.get_element(2)); +} + +TEST(DataTypeSerDeDecodedValuesTest, DecimalRejectsOutOfRangeValueInStrictMode) { + auto type = std::make_shared(std::make_shared(9, 2)); + std::vector values = {999999999, 1000000000}; + std::vector null_map = {0, 0}; + auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map); + view.enable_strict_mode = true; + + auto result = read_column(type, view); + + expect_data_quality_error(result.status); + const auto& nullable_column = assert_cast(*result.column); + EXPECT_EQ(0, nullable_column.size()); + EXPECT_EQ(0, nullable_column.get_null_map_data().size()); + EXPECT_EQ(0, nullable_column.get_nested_column().size()); +} + +TEST(DataTypeSerDeDecodedValuesTest, DecimalNullsOutOfRangeValueInNonStrictMode) { + auto type = std::make_shared(std::make_shared(9, 2)); + std::vector values = {999999999, 1000000000, -1000000000, -999999999}; + std::vector null_map = {0, 0, 0, 0}; + auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map); + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& nullable_column = assert_cast(*result.column); + const auto& decimal_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(4, nullable_column.size()); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_TRUE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_EQ(Decimal32(999999999), decimal_column.get_element(0)); + EXPECT_EQ(Decimal32(0), decimal_column.get_element(1)); + EXPECT_EQ(Decimal32(0), decimal_column.get_element(2)); + EXPECT_EQ(Decimal32(-999999999), decimal_column.get_element(3)); +} + +TEST(DataTypeSerDeDecodedValuesTest, DecimalRejectsNullBinaryDataWithPositiveLength) { + auto type = std::make_shared(18, 2); + std::vector refs = {StringRef(static_cast(nullptr), 2)}; + + auto result = read_column(type, make_binary_view(DecodedValueKind::BINARY, refs)); + + expect_corruption(result.status); + EXPECT_NE(std::string::npos, result.status.to_string().find("row 0")); +} + +TEST(DataTypeSerDeDecodedValuesTest, DecimalAllowsZeroLengthBinaryAsZero) { + auto type = std::make_shared(18, 2); + std::vector refs = {StringRef(static_cast(nullptr), 0), + StringRef("", 0)}; + + auto result = read_column(type, make_binary_view(DecodedValueKind::BINARY, refs)); + + ASSERT_TRUE(result.status.ok()) << result.status; + expect_column_strings(*type, *result.column, {"0.00", "0.00"}); +} + +TEST(DataTypeSerDeDecodedValuesTest, DecimalRejectsInvalidKind) { + auto type = std::make_shared(18, 2); + for (auto kind : {DecodedValueKind::BOOL, DecodedValueKind::FLOAT, DecodedValueKind::DOUBLE, + DecodedValueKind::UINT64}) { + std::vector values = {0}; + auto result = read_column(type, make_fixed_view(kind, values)); + expect_not_supported(result.status); + EXPECT_EQ(0, result.column->size()); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, DecimalRejectsMissingBufferWhenNonNullExists) { + auto type = std::make_shared(18, 2); + { + DecodedColumnView view; + view.value_kind = DecodedValueKind::INT64; + view.row_count = 1; + auto result = read_column(type, view); + expect_corruption(result.status); + } + { + DecodedColumnView view; + view.value_kind = DecodedValueKind::BINARY; + view.row_count = 1; + auto result = read_column(type, view); + expect_corruption(result.status); + } +} + +// ---------------------------------------------------------------------- +// Nullable SerDe wrapper +// ---------------------------------------------------------------------- +// Nullable tests focus on wrapper responsibilities: copying the outer null map, inserting default +// nested values for null rows, treating a missing null_map as all non-null, appending to existing +// columns, and rolling back outer state when the nested reader rejects the input. + +TEST(DataTypeSerDeDecodedValuesTest, NullablePropagatesNullMapAndReadsNested) { + auto type = std::make_shared(std::make_shared()); + std::vector values = {10, 20, 30, 40}; + std::vector null_map = {0, 1, 0, 1}; + + auto result = read_column(type, make_fixed_view(DecodedValueKind::INT32, values, &null_map)); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& nullable_column = assert_cast(*result.column); + const auto& nested_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(4, nullable_column.size()); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_TRUE(nullable_column.is_null_at(3)); + EXPECT_EQ(10, nested_column.get_element(0)); + EXPECT_EQ(0, nested_column.get_element(1)); + EXPECT_EQ(30, nested_column.get_element(2)); + EXPECT_EQ(0, nested_column.get_element(3)); +} + +TEST(DataTypeSerDeDecodedValuesTest, NullableWithoutNullMapReadsAllNonNull) { + auto type = std::make_shared(std::make_shared()); + std::vector storage = {"alpha", "beta"}; + auto refs = string_refs(storage); + + auto result = read_column(type, make_binary_view(DecodedValueKind::BINARY, refs)); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& nullable_column = assert_cast(*result.column); + ASSERT_EQ(2, nullable_column.size()); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + expect_binary_column(nullable_column.get_nested_column(), storage); +} + +TEST(DataTypeSerDeDecodedValuesTest, NullableAllNullDoesNotRequireNestedBuffer) { + std::vector null_map = {1, 1}; + std::vector types = { + std::make_shared(std::make_shared()), + std::make_shared(std::make_shared(18, 2)), + std::make_shared(std::make_shared()), + std::make_shared(std::make_shared()), + }; + + for (const auto& type : types) { + DecodedColumnView view; + view.value_kind = type->get_name().find("String") != std::string::npos + ? DecodedValueKind::BINARY + : DecodedValueKind::INT32; + view.row_count = 2; + view.null_map = null_map.data(); + auto result = read_column(type, view); + ASSERT_TRUE(result.status.ok()) << result.status << ", type=" << type->get_name(); + const auto& nullable_column = assert_cast(*result.column); + ASSERT_EQ(2, nullable_column.size()); + EXPECT_TRUE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_EQ(2, nullable_column.get_nested_column().size()); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, NullableAppendToExistingColumn) { + auto type = std::make_shared(std::make_shared()); + auto column = type->create_column(); + + std::vector first_values = {1, 2}; + auto first_status = type->get_serde()->read_column_from_decoded_values( + *column, make_fixed_view(DecodedValueKind::INT32, first_values)); + ASSERT_TRUE(first_status.ok()) << first_status; + + std::vector second_values = {10, 20, 30}; + std::vector second_null_map = {0, 1, 0}; + auto second_status = type->get_serde()->read_column_from_decoded_values( + *column, make_fixed_view(DecodedValueKind::INT32, second_values, &second_null_map)); + ASSERT_TRUE(second_status.ok()) << second_status; + + const auto& nullable_column = assert_cast(*column); + const auto& nested_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(5, nullable_column.size()); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_TRUE(nullable_column.is_null_at(3)); + EXPECT_FALSE(nullable_column.is_null_at(4)); + EXPECT_EQ(1, nested_column.get_element(0)); + EXPECT_EQ(2, nested_column.get_element(1)); + EXPECT_EQ(10, nested_column.get_element(2)); + EXPECT_EQ(0, nested_column.get_element(3)); + EXPECT_EQ(30, nested_column.get_element(4)); +} + +TEST(DataTypeSerDeDecodedValuesTest, NullablePropagatesNestedError) { + auto type = std::make_shared(std::make_shared()); + auto column = type->create_column(); + std::vector values = {1.0}; + std::vector null_map = {0}; + auto view = make_fixed_view(DecodedValueKind::DOUBLE, values, &null_map); + view.enable_strict_mode = true; + + auto status = type->get_serde()->read_column_from_decoded_values(*column, view); + + expect_not_supported(status); + const auto& nullable_column = assert_cast(*column); + EXPECT_EQ(0, nullable_column.size()); + EXPECT_EQ(0, nullable_column.get_null_map_data().size()); + EXPECT_EQ(0, nullable_column.get_nested_column().size()); +} + +TEST(DataTypeSerDeDecodedValuesTest, NullableNonStrictModeNullsUnsupportedDecodedKindForAllTypes) { + struct Case { + DataTypePtr type; + DecodedValueKind kind; + }; + std::vector cases = { + {std::make_shared(std::make_shared()), + DecodedValueKind::INT32}, + {std::make_shared(std::make_shared()), + DecodedValueKind::DOUBLE}, + {std::make_shared(std::make_shared()), + DecodedValueKind::FLOAT}, + {std::make_shared(std::make_shared()), + DecodedValueKind::INT64}, + {std::make_shared(std::make_shared()), + DecodedValueKind::INT64}, + {std::make_shared(std::make_shared(6)), + DecodedValueKind::DOUBLE}, + {std::make_shared(std::make_shared(6)), + DecodedValueKind::DOUBLE}, + {std::make_shared(std::make_shared(18, 2)), + DecodedValueKind::DOUBLE}, + }; + + std::vector values = {1, 2}; + for (const auto& test_case : cases) { + auto view = make_fixed_view(test_case.kind, values); + + auto result = read_column(test_case.type, view); + + ASSERT_TRUE(result.status.ok()) << result.status << ", type=" << test_case.type->get_name(); + expect_nullable_all_null(*result.column, values.size()); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, NullableStrictModeRejectsUnsupportedDecodedKind) { + auto type = std::make_shared(std::make_shared()); + std::vector values = {1.0}; + std::vector null_map = {0}; + auto view = make_fixed_view(DecodedValueKind::DOUBLE, values, &null_map); + view.enable_strict_mode = true; + + auto result = read_column(type, view); + + expect_not_supported(result.status); + const auto& nullable_column = assert_cast(*result.column); + EXPECT_EQ(0, nullable_column.size()); + EXPECT_EQ(0, nullable_column.get_null_map_data().size()); + EXPECT_EQ(0, nullable_column.get_nested_column().size()); +} + +TEST(DataTypeSerDeDecodedValuesTest, NullableNonStrictModeNullsRowLevelDecodedConversionFailure) { + { + auto type = std::make_shared(std::make_shared()); + std::vector refs = {StringRef("ok", 2), + StringRef(static_cast(nullptr), 2), + StringRef("", 0)}; + auto view = make_binary_view(DecodedValueKind::BINARY, refs); + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& nullable_column = assert_cast(*result.column); + ASSERT_EQ(3, nullable_column.size()); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + expect_binary_column(nullable_column.get_nested_column(), {"ok", "", ""}); + } + { + auto type = std::make_shared(std::make_shared(18, 2)); + std::vector refs = {StringRef("\x30\x39", 2), + StringRef(static_cast(nullptr), 2)}; + auto view = make_binary_view(DecodedValueKind::BINARY, refs); + + auto result = read_column(type, view); + + ASSERT_TRUE(result.status.ok()) << result.status; + const auto& nullable_column = assert_cast(*result.column); + ASSERT_EQ(2, nullable_column.size()); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + expect_column_strings(*type, *result.column, {"123.45", "NULL"}); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, NullableStrictModeRejectsRowLevelDecodedConversionFailure) { + auto type = std::make_shared(std::make_shared()); + std::vector refs = {StringRef("ok", 2), + StringRef(static_cast(nullptr), 2)}; + auto view = make_binary_view(DecodedValueKind::BINARY, refs); + view.enable_strict_mode = true; + + auto result = read_column(type, view); + + expect_corruption(result.status); + const auto& nullable_column = assert_cast(*result.column); + EXPECT_EQ(0, nullable_column.size()); + EXPECT_EQ(0, nullable_column.get_null_map_data().size()); + EXPECT_EQ(0, nullable_column.get_nested_column().size()); +} + +// ---------------------------------------------------------------------- +// read_field_from_decoded_value +// ---------------------------------------------------------------------- +// The field path is used by Parquet min/max and pruning code. It must be covered independently +// because it creates a one-row column, delegates to the batch reader, and extracts a Field value. + +TEST(DataTypeSerDeDecodedValuesTest, ReadFieldPrimitiveValues) { + { + std::vector values = {true}; + auto field = read_field(std::make_shared(), make_bool_view(values)); + EXPECT_EQ(TYPE_BOOLEAN, field.get_type()); + EXPECT_TRUE(field.get()); + } + { + std::vector values = {-42}; + auto field = read_field(std::make_shared(), + make_fixed_view(DecodedValueKind::INT32, values)); + EXPECT_EQ(TYPE_INT, field.get_type()); + EXPECT_EQ(-42, field.get()); + } + { + std::vector values = {1234567890123LL}; + auto field = read_field(std::make_shared(), + make_fixed_view(DecodedValueKind::INT64, values)); + EXPECT_EQ(TYPE_BIGINT, field.get_type()); + EXPECT_EQ(1234567890123LL, field.get()); + } + { + std::vector values = {-9}; + auto field = read_field(std::make_shared(), + make_fixed_view(DecodedValueKind::INT64, values)); + EXPECT_EQ(TYPE_LARGEINT, field.get_type()); + EXPECT_EQ(static_cast<__int128_t>(-9), field.get()); + } + { + std::vector values = {std::numeric_limits::quiet_NaN()}; + auto field = read_field(std::make_shared(), + make_fixed_view(DecodedValueKind::FLOAT, values)); + EXPECT_EQ(TYPE_FLOAT, field.get_type()); + EXPECT_TRUE(std::isnan(field.get())); + } + { + std::vector values = {std::numeric_limits::infinity()}; + auto field = read_field(std::make_shared(), + make_fixed_view(DecodedValueKind::DOUBLE, values)); + EXPECT_EQ(TYPE_DOUBLE, field.get_type()); + EXPECT_TRUE(std::isinf(field.get())); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadFieldLogicalIntegerCastsPhysicalValue) { + { + std::vector values = {32767}; + auto view = + with_logical_integer(make_fixed_view(DecodedValueKind::INT32, values), 8, false); + auto field = read_field(std::make_shared(), view); + EXPECT_EQ(TYPE_SMALLINT, field.get_type()); + EXPECT_EQ(255, field.get()); + } + { + std::vector values = {-1}; + auto view = + with_logical_integer(make_fixed_view(DecodedValueKind::UINT32, values), 32, false); + auto field = read_field(std::make_shared(), view); + EXPECT_EQ(TYPE_BIGINT, field.get_type()); + EXPECT_EQ(4294967295LL, field.get()); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadFieldStringValues) { + auto type = std::make_shared(); + std::vector storage = {std::string("a\0b", 3)}; + auto refs = string_refs(storage); + auto field = read_field(type, make_binary_view(DecodedValueKind::BINARY, refs)); + EXPECT_EQ(TYPE_STRING, field.get_type()); + EXPECT_EQ(std::string("a\0b", 3), field.get()); + + std::vector fixed_storage = {std::string("\x00\x01\x02\x03", 4)}; + auto fixed_refs = string_refs(fixed_storage); + auto fixed_field = + read_field(type, make_binary_view(DecodedValueKind::FIXED_BINARY, fixed_refs, 4)); + EXPECT_EQ(TYPE_STRING, fixed_field.get_type()); + EXPECT_EQ(std::string("\x00\x01\x02\x03", 4), fixed_field.get()); +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadFieldDateTimeAndTimeValues) { + { + auto type = std::make_shared(); + std::vector values = {18628}; + auto field = read_field(type, make_fixed_view(DecodedValueKind::INT32, values)); + EXPECT_EQ(TYPE_DATEV2, field.get_type()); + EXPECT_EQ("2021-01-01", field.to_debug_string(0)); + } + { + auto type = std::make_shared(6); + std::vector values = {1234567}; + auto view = make_fixed_view(DecodedValueKind::INT64, values); + view.time_unit = DecodedTimeUnit::MICROS; + auto field = read_field(type, view); + EXPECT_EQ(TYPE_DATETIMEV2, field.get_type()); + EXPECT_EQ("1970-01-01 00:00:01.234567", field.to_debug_string(6)); + } + { + auto type = std::make_shared(6); + std::vector values = {1234}; + auto view = make_fixed_view(DecodedValueKind::INT64, values); + view.time_unit = DecodedTimeUnit::MILLIS; + auto field = read_field(type, view); + EXPECT_EQ(TYPE_DATETIMEV2, field.get_type()); + EXPECT_EQ("1970-01-01 00:00:01.234000", field.to_debug_string(6)); + } + { + auto type = std::make_shared(6); + std::vector values = {{0, 2440588}}; + auto field = read_field(type, make_fixed_view(DecodedValueKind::INT96, values)); + EXPECT_EQ(TYPE_DATETIMEV2, field.get_type()); + EXPECT_EQ("1970-01-01 00:00:00.000000", field.to_debug_string(6)); + } + { + auto type = std::make_shared(6); + std::vector values = {3661000001LL}; + auto view = make_fixed_view(DecodedValueKind::INT64, values); + view.time_unit = DecodedTimeUnit::MICROS; + auto field = read_field(type, view); + EXPECT_EQ(TYPE_TIMEV2, field.get_type()); + auto column = type->create_column(); + column->insert(field); + expect_column_strings(*type, *column, {"01:01:01.000001"}); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadFieldDecimalValues) { + { + auto type = std::make_shared(9, 2); + std::vector values = {12345}; + auto field = read_field(type, make_fixed_view(DecodedValueKind::INT32, values)); + EXPECT_EQ(TYPE_DECIMAL32, field.get_type()); + EXPECT_EQ("123.45", field.to_debug_string(2)); + } + { + auto type = std::make_shared(18, 4); + std::vector values = {-1}; + auto field = read_field(type, make_fixed_view(DecodedValueKind::INT64, values)); + EXPECT_EQ(TYPE_DECIMAL64, field.get_type()); + EXPECT_EQ("-0.0001", field.to_debug_string(4)); + } + { + auto type = std::make_shared(38, 2); + std::vector storage = {std::string("\x30\x39", 2)}; + auto refs = string_refs(storage); + auto field = read_field(type, make_binary_view(DecodedValueKind::BINARY, refs)); + EXPECT_EQ(TYPE_DECIMAL128I, field.get_type()); + EXPECT_EQ("123.45", field.to_debug_string(2)); + } + { + auto type = std::make_shared(76, 2); + std::vector storage = {std::string(31, '\xff') + std::string("\xbd", 1)}; + auto refs = string_refs(storage); + auto field = read_field(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 32)); + EXPECT_EQ(TYPE_DECIMAL256, field.get_type()); + EXPECT_EQ("-0.67", field.to_debug_string(2)); + } +} + +TEST(DataTypeSerDeDecodedValuesTest, ReadFieldPropagatesUnsupportedKind) { + { + auto type = std::make_shared(); + std::vector values = {1}; + expect_not_supported( + read_field_status(type, make_fixed_view(DecodedValueKind::INT32, values))); + } + { + auto type = std::make_shared(); + std::vector values = {1.0}; + expect_not_supported( + read_field_status(type, make_fixed_view(DecodedValueKind::DOUBLE, values))); + } + { + auto type = std::make_shared(); + std::vector values = {0}; + expect_not_supported( + read_field_status(type, make_fixed_view(DecodedValueKind::INT64, values))); + } +} + +TEST(DataTypeSerDeDecodedValuesDeathTest, ReadFieldRejectsInvalidRowCountDeathTest) { + auto type = std::make_shared(); + std::vector values = {1, 2}; + Field field; + + auto zero_row_view = make_fixed_view(DecodedValueKind::INT32, values); + zero_row_view.row_count = 0; + EXPECT_DEATH( + { + auto status = type->get_serde()->read_field_from_decoded_value(*type, &field, + zero_row_view); + (void)status; + }, + "view.row_count == 1"); + + auto two_row_view = make_fixed_view(DecodedValueKind::INT32, values); + two_row_view.row_count = 2; + EXPECT_DEATH( + { + auto status = type->get_serde()->read_field_from_decoded_value(*type, &field, + two_row_view); + (void)status; + }, + "view.row_count == 1"); +} + +TEST(DataTypeSerDeDecodedValuesDeathTest, ReadFieldRejectsNullFieldPointerDeathTest) { + auto type = std::make_shared(); + std::vector values = {1}; + auto view = make_fixed_view(DecodedValueKind::INT32, values); + + EXPECT_DEATH( + { + auto status = + type->get_serde()->read_field_from_decoded_value(*type, nullptr, view); + (void)status; + }, + "field != nullptr"); +} + +// ---------------------------------------------------------------------- +// Illegal kind matrix +// ---------------------------------------------------------------------- +// This compact matrix complements the focused error tests above by ensuring each decoded-aware +// family rejects representative illegal physical kinds without mutating an empty destination. + +TEST(DataTypeSerDeDecodedValuesTest, IllegalKindMatrixRejectsUnsupportedCombinations) { + struct Case { + DataTypePtr type; + std::vector illegal_kinds; + }; + std::vector cases = { + {std::make_shared(), {DecodedValueKind::INT32, DecodedValueKind::BINARY}}, + {std::make_shared(), + {DecodedValueKind::BOOL, DecodedValueKind::FLOAT, DecodedValueKind::DOUBLE, + DecodedValueKind::BINARY}}, + {std::make_shared(), + {DecodedValueKind::DOUBLE, DecodedValueKind::INT32}}, + {std::make_shared(), + {DecodedValueKind::FLOAT, DecodedValueKind::INT64}}, + {std::make_shared(), + {DecodedValueKind::INT32, DecodedValueKind::DOUBLE}}, + {std::make_shared(), + {DecodedValueKind::INT64, DecodedValueKind::BINARY}}, + {std::make_shared(6), + {DecodedValueKind::INT32, DecodedValueKind::DOUBLE, DecodedValueKind::BINARY}}, + {std::make_shared(6), + {DecodedValueKind::BOOL, DecodedValueKind::BINARY, DecodedValueKind::DOUBLE}}, + {std::make_shared(18, 2), + {DecodedValueKind::BOOL, DecodedValueKind::UINT64, DecodedValueKind::FLOAT, + DecodedValueKind::DOUBLE}}, + }; + + for (const auto& test_case : cases) { + for (auto kind : test_case.illegal_kinds) { + std::vector values = {0}; + auto result = read_column(test_case.type, make_fixed_view(kind, values)); + expect_not_supported(result.status); + EXPECT_EQ(0, result.column->size()) << test_case.type->get_name(); + } + } +} + +} // namespace doris diff --git a/be/test/core/data_type_serde/data_type_serde_pb_test.cpp b/be/test/core/data_type_serde/data_type_serde_pb_test.cpp index 986583982eb2bd..c1663bf7a9dd49 100644 --- a/be/test/core/data_type_serde/data_type_serde_pb_test.cpp +++ b/be/test/core/data_type_serde/data_type_serde_pb_test.cpp @@ -54,6 +54,7 @@ #include "core/data_type/data_type_quantilestate.h" #include "core/data_type/data_type_string.h" #include "core/data_type/data_type_struct.h" +#include "core/data_type/data_type_timestamptz.h" #include "core/data_type_serde/data_type_serde.h" #include "core/types.h" #include "core/value/bitmap_value.h" @@ -646,6 +647,17 @@ TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTestDateTime) { } } +TEST(DataTypeSerDePbTest, DataTypeTimeStampTzToProtobufKeepsScale) { + DataTypePtr data_type(std::make_shared(6)); + PTypeDesc type_desc; + data_type->to_protobuf(&type_desc); + + ASSERT_EQ(type_desc.types_size(), 1); + const auto& scalar_type = type_desc.types(0).scalar_type(); + EXPECT_EQ(scalar_type.type(), TPrimitiveType::TIMESTAMPTZ); + EXPECT_EQ(scalar_type.scale(), 6); +} + TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTestLargeInt) { std::cout << "==== LargeInt === " << std::endl; // LargeInt @@ -662,4 +674,4 @@ TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTestLargeInt) { check_pb_col(data_type, *vec.get()); } } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/exec/runtime_filter/runtime_filter_expr_sampling_test.cpp b/be/test/exec/runtime_filter/runtime_filter_expr_sampling_test.cpp index 403ef8713e4e67..b3e512734c6e73 100644 --- a/be/test/exec/runtime_filter/runtime_filter_expr_sampling_test.cpp +++ b/be/test/exec/runtime_filter/runtime_filter_expr_sampling_test.cpp @@ -18,10 +18,13 @@ #include #include +#include "core/data_type/data_type_number.h" #include "exec/runtime_filter/runtime_filter_selectivity.h" #include "exec/runtime_filter/runtime_filter_test_utils.h" #include "exprs/runtime_filter_expr.h" +#include "exprs/vdirect_in_predicate.h" #include "exprs/vexpr_context.h" +#include "exprs/vslot_ref.h" namespace doris { @@ -178,4 +181,47 @@ TEST_F(RuntimeFilterExprSamplingTest, sampling_frequency_survives_context_recrea EXPECT_TRUE(selectivity.maybe_always_true_can_ignore()); } +// RuntimeFilterExpr exposes _impl->children(), but the wrapper itself does not own those +// children in its own _children vector. Deep clone must therefore clone _impl explicitly. +TEST_F(RuntimeFilterExprSamplingTest, deep_clone_clones_impl_tree) { + auto bool_type = TTypeDescBuilder() + .set_types(TTypeNodeBuilder() + .set_type(TTypeNodeType::SCALAR) + .set_scalar_type(TPrimitiveType::BOOLEAN) + .build()) + .build(); + TExprNode node = TExprNodeBuilder(TExprNodeType::IN_PRED, bool_type, 0).build(); + node.in_predicate.__set_is_not_in(false); + node.__set_opcode(TExprOpcode::FILTER_IN); + node.__set_is_nullable(false); + + auto slot = VSlotRef::create_shared(/*slot_id=*/0, /*column_id=*/0, /*column_uniq_id=*/10, + std::make_shared(), "c0"); + auto impl = VDirectInPredicate::create_shared(node, nullptr); + impl->add_child(slot); + + auto wrapper = RuntimeFilterExpr::create_shared(node, impl, 0.4, false, /*filter_id=*/7, + /*sampling_frequency=*/32); + + VExprSPtr cloned_expr; + ASSERT_TRUE(wrapper->deep_clone(&cloned_expr).ok()); + + auto* cloned_wrapper = dynamic_cast(cloned_expr.get()); + ASSERT_NE(cloned_wrapper, nullptr); + EXPECT_NE(cloned_wrapper, wrapper.get()); + EXPECT_EQ(cloned_wrapper->filter_id(), 7); + + auto cloned_impl = cloned_wrapper->get_impl(); + ASSERT_NE(cloned_impl, nullptr); + EXPECT_NE(cloned_impl.get(), impl.get()); + ASSERT_EQ(cloned_impl->get_num_children(), 1); + EXPECT_NE(cloned_impl->children()[0].get(), slot.get()); + + auto* cloned_slot = dynamic_cast(cloned_impl->children()[0].get()); + ASSERT_NE(cloned_slot, nullptr); + EXPECT_EQ(cloned_slot->column_id(), 0); + EXPECT_EQ(cloned_slot->column_uniq_id(), 10); + EXPECT_EQ(cloned_slot->column_name(), "c0"); +} + } // namespace doris diff --git a/be/test/exec/scan/access_path_parser_test.cpp b/be/test/exec/scan/access_path_parser_test.cpp new file mode 100644 index 00000000000000..d4bd6ab6c06360 --- /dev/null +++ b/be/test/exec/scan/access_path_parser_test.cpp @@ -0,0 +1,371 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/scan/access_path_parser.h" + +#include +#include + +#include +#include +#include +#include + +#include "common/consts.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type/data_type_struct.h" +#include "core/field.h" + +namespace doris { +namespace { + +TColumnAccessPath data_access_path(std::vector path) { + TColumnAccessPath access_path; + access_path.__set_type(TAccessPathType::DATA); + TDataAccessPath data_path; + data_path.__set_path(std::move(path)); + access_path.__set_data_access_path(std::move(data_path)); + return access_path; +} + +TColumnAccessPath data_access_path_without_payload() { + TColumnAccessPath access_path; + access_path.__set_type(TAccessPathType::DATA); + return access_path; +} + +TColumnAccessPath meta_access_path() { + TColumnAccessPath access_path; + access_path.__set_type(TAccessPathType::META); + return access_path; +} + +format::ColumnDefinition field(int32_t id, std::string name, DataTypePtr type, + std::vector children = {}, + std::vector aliases = {}) { + return { + .identifier = Field::create_field(id), + .name = std::move(name), + .name_mapping = std::move(aliases), + .type = std::move(type), + .children = std::move(children), + }; +} + +format::ColumnDefinition root_column(int32_t id, std::string name, DataTypePtr type) { + return { + .identifier = Field::create_field(id), + .name = std::move(name), + .type = std::move(type), + }; +} + +void expect_child(const format::ColumnDefinition& child, int32_t id, const std::string& name) { + ASSERT_TRUE(child.has_identifier_field_id()); + EXPECT_EQ(child.get_identifier_field_id(), id); + EXPECT_EQ(child.name, name); +} + +const format::ColumnDefinition* find_child_by_name(const format::ColumnDefinition& parent, + const std::string& name) { + for (const auto& child : parent.children) { + if (child.name == name) { + return &child; + } + } + return nullptr; +} + +} // namespace + +// Scenario: primitive columns and scanner-materialized virtual columns should not build nested +// children, even when their descriptor carries access paths that are not meaningful to the parser. +TEST(AccessPathParserTest, IgnoresPrimitiveColumnsAndScannerVirtualColumns) { + auto int_type = std::make_shared(); + auto string_type = std::make_shared(); + + // Primitive columns have no nested children, so parser should not inspect even invalid paths. + auto primitive = root_column(1, "id", int_type); + auto status = AccessPathParser::build_nested_children( + &primitive, std::vector {meta_access_path()}, nullptr); + ASSERT_TRUE(status.ok()) << status; + EXPECT_TRUE(primitive.children.empty()); + + // Iceberg rowid is materialized by scanner/table-reader logic and may carry a negative access + // path. Parser must leave it untouched. + auto rowid_type = std::make_shared( + DataTypes {string_type, std::make_shared(), + std::make_shared(), string_type}, + Strings {"file_path", "row_pos", "partition_spec_id", "partition_data_json"}); + format::ColumnDefinition rowid { + .identifier = Field::create_field(BeConsts::ICEBERG_ROWID_COL), + .name = BeConsts::ICEBERG_ROWID_COL, + .type = rowid_type, + }; + status = AccessPathParser::build_nested_children( + &rowid, std::vector {data_access_path({"-1"})}, nullptr); + ASSERT_TRUE(status.ok()) << status; + EXPECT_TRUE(rowid.children.empty()); +} + +// Scenario: reject unsupported top-level inputs before recursive type parsing, including META +// paths, missing DATA payloads, and access paths whose root does not match the projected slot. +TEST(AccessPathParserTest, RejectsUnsupportedTopLevelAccessPathInputs) { + auto int_type = std::make_shared(); + auto struct_type = std::make_shared(DataTypes {int_type}, Strings {"a"}); + + struct Case { + std::string name; + format::ColumnDefinition column; + std::vector paths; + }; + std::vector cases; + cases.push_back({"meta path", root_column(100, "s", struct_type), {meta_access_path()}}); + cases.push_back({"missing DATA payload", + root_column(100, "s", struct_type), + {data_access_path_without_payload()}}); + cases.push_back({"wrong root name", + root_column(100, "s", struct_type), + {data_access_path({"other", "a"})}}); + cases.push_back({"wrong root field id", + root_column(100, "s", struct_type), + {data_access_path({"101", "a"})}}); + + for (auto& test_case : cases) { + auto status = AccessPathParser::build_nested_children(&test_case.column, test_case.paths, + nullptr); + EXPECT_FALSE(status.ok()) << test_case.name; + } +} + +// Scenario: struct access paths support field-id lookup, alias lookup, case-insensitive name +// fallback, and whole-struct expansion; reserved array/map path tokens remain invalid. +TEST(AccessPathParserTest, StructAccessPathMatrix) { + auto int_type = std::make_shared(); + auto struct_type = + std::make_shared(DataTypes {int_type, int_type}, Strings {"a", "b"}); + format::ColumnDefinition schema { + .identifier = Field::create_field(100), + .name = "s", + .type = struct_type, + .children = + { + field(101, "a", int_type), + field(205, "b", int_type, {}, {"old_b"}), + }, + }; + + { + auto column = root_column(100, "s", struct_type); + auto status = AccessPathParser::build_nested_children( + &column, std::vector {data_access_path({"s", "A"})}, nullptr); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(column.children.size(), 1); + expect_child(column.children[0], 0, "a"); + } + { + auto column = root_column(100, "s", struct_type); + auto status = AccessPathParser::build_nested_children( + &column, std::vector {data_access_path({"100", "205"})}, + &schema); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(column.children.size(), 1); + expect_child(column.children[0], 205, "b"); + } + { + auto column = root_column(100, "s", struct_type); + auto status = AccessPathParser::build_nested_children( + &column, std::vector {data_access_path({"s", "old_b"})}, + &schema); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(column.children.size(), 1); + expect_child(column.children[0], 205, "b"); + EXPECT_EQ(column.children[0].name_mapping, std::vector({"old_b"})); + } + { + auto column = root_column(100, "s", struct_type); + auto status = AccessPathParser::build_nested_children( + &column, std::vector {data_access_path({"s"})}, &schema); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(column.children.size(), 2); + expect_child(column.children[0], 101, "a"); + expect_child(column.children[1], 205, "b"); + } + + for (const auto& invalid_child : {"OFFSET", "*", "KEYS", "VALUES", "missing"}) { + auto column = root_column(100, "s", struct_type); + auto status = AccessPathParser::build_nested_children( + &column, std::vector {data_access_path({"s", invalid_child})}, + &schema); + EXPECT_FALSE(status.ok()) << invalid_child; + } +} + +// Scenario: array access paths must pass through the "*" element token, then reuse struct child +// parsing under the element wrapper; invalid array tokens are rejected. +TEST(AccessPathParserTest, ArrayAccessPathMatrix) { + auto int_type = std::make_shared(); + auto string_type = std::make_shared(); + auto element_type = std::make_shared(DataTypes {string_type, int_type}, + Strings {"item", "quantity"}); + auto array_type = std::make_shared(element_type); + format::ColumnDefinition schema { + .identifier = Field::create_field(200), + .name = "items", + .type = array_type, + .children = + { + field(201, "element", element_type, + { + field(202, "item", string_type, {}, {"old_item"}), + field(203, "quantity", int_type), + }), + }, + }; + + { + auto column = root_column(200, "items", array_type); + auto status = AccessPathParser::build_nested_children( + &column, + std::vector {data_access_path({"items", "*", "old_item"})}, + &schema); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(column.children.size(), 1); + expect_child(column.children[0], 201, "element"); + ASSERT_EQ(column.children[0].children.size(), 1); + expect_child(column.children[0].children[0], 202, "item"); + EXPECT_EQ(column.children[0].children[0].name_mapping, + std::vector({"old_item"})); + } + { + auto column = root_column(200, "items", array_type); + auto status = AccessPathParser::build_nested_children( + &column, std::vector {data_access_path({"items"})}, &schema); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(column.children.size(), 1); + expect_child(column.children[0], 201, "element"); + ASSERT_EQ(column.children[0].children.size(), 2); + expect_child(column.children[0].children[0], 202, "item"); + expect_child(column.children[0].children[1], 203, "quantity"); + } + + for (const auto& invalid_path : std::vector> { + {"items", "OFFSET"}, {"items", "item"}, {"items", "*", "missing"}}) { + auto column = root_column(200, "items", array_type); + auto status = AccessPathParser::build_nested_children( + &column, std::vector {data_access_path(invalid_path)}, &schema); + EXPECT_FALSE(status.ok()) << invalid_path.back(); + } +} + +// Scenario: map access paths split KEYS/VALUES, force the missing side needed for materialization, +// merge repeated value-child requests, and reject unsupported map child tokens. +TEST(AccessPathParserTest, MapAccessPathMatrix) { + auto int_type = std::make_shared(); + auto string_type = std::make_shared(); + auto value_type = std::make_shared( + DataTypes {string_type, int_type, string_type}, Strings {"full_name", "age", "gender"}); + auto map_type = std::make_shared(string_type, value_type); + format::ColumnDefinition schema { + .identifier = Field::create_field(300), + .name = "m", + .type = map_type, + .children = + { + field(301, "key", string_type), + field(302, "value", value_type, + { + field(303, "full_name", string_type, {}, {"name"}), + field(304, "age", int_type), + field(305, "gender", string_type), + }), + }, + }; + + { + auto column = root_column(300, "m", map_type); + auto status = AccessPathParser::build_nested_children( + &column, std::vector {data_access_path({"m", "KEYS"})}, &schema); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(column.children.size(), 2); + expect_child(column.children[0], 301, "key"); + expect_child(column.children[1], 302, "value"); + ASSERT_EQ(column.children[1].children.size(), 3); + const auto* full_name = find_child_by_name(column.children[1], "full_name"); + ASSERT_NE(full_name, nullptr); + expect_child(*full_name, 303, "full_name"); + const auto* age = find_child_by_name(column.children[1], "age"); + ASSERT_NE(age, nullptr); + expect_child(*age, 304, "age"); + const auto* gender = find_child_by_name(column.children[1], "gender"); + ASSERT_NE(gender, nullptr); + expect_child(*gender, 305, "gender"); + } + { + auto column = root_column(300, "m", map_type); + auto status = AccessPathParser::build_nested_children( + &column, std::vector {data_access_path({"m", "VALUES", "age"})}, + &schema); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(column.children.size(), 2); + expect_child(column.children[0], 301, "key"); + expect_child(column.children[1], 302, "value"); + ASSERT_EQ(column.children[1].children.size(), 1); + expect_child(column.children[1].children[0], 304, "age"); + } + { + auto column = root_column(300, "m", map_type); + auto status = AccessPathParser::build_nested_children( + &column, + std::vector { + data_access_path({"m", "VALUES", "name"}), + data_access_path({"m", "*", "gender"}), + }, + &schema); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(column.children.size(), 2); + ASSERT_EQ(column.children[1].children.size(), 2); + const auto* full_name = find_child_by_name(column.children[1], "full_name"); + ASSERT_NE(full_name, nullptr); + expect_child(*full_name, 303, "full_name"); + EXPECT_EQ(full_name->name_mapping, std::vector({"name"})); + const auto* gender = find_child_by_name(column.children[1], "gender"); + ASSERT_NE(gender, nullptr); + expect_child(*gender, 305, "gender"); + } + { + auto column = root_column(300, "m", map_type); + auto status = AccessPathParser::build_nested_children( + &column, std::vector {data_access_path({"m"})}, &schema); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(column.children.size(), 2); + ASSERT_EQ(column.children[1].children.size(), 3); + } + + for (const auto& invalid_path : std::vector> { + {"m", "OFFSET"}, {"m", "ENTRY"}, {"m", "VALUES", "missing"}}) { + auto column = root_column(300, "m", map_type); + auto status = AccessPathParser::build_nested_children( + &column, std::vector {data_access_path(invalid_path)}, &schema); + EXPECT_FALSE(status.ok()) << invalid_path.back(); + } +} + +} // namespace doris diff --git a/be/test/exec/scan/file_scanner_v2_test.cpp b/be/test/exec/scan/file_scanner_v2_test.cpp new file mode 100644 index 00000000000000..436a18c66decf4 --- /dev/null +++ b/be/test/exec/scan/file_scanner_v2_test.cpp @@ -0,0 +1,347 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/scan/file_scanner_v2.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "common/consts.h" +#include "core/assert_cast.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "exec/scan/split_source_connector.h" +#include "exprs/runtime_filter_expr.h" +#include "exprs/vdirect_in_predicate.h" +#include "exprs/vslot_ref.h" +#include "format_v2/expr/cast.h" + +namespace doris { +namespace { + +TFileRangeDesc range_with_format(std::string table_format, TFileFormatType::type format_type) { + TFileRangeDesc range; + range.__set_format_type(format_type); + if (!table_format.empty()) { + TTableFormatFileDesc table_desc; + table_desc.__set_table_format_type(std::move(table_format)); + range.__set_table_format_params(std::move(table_desc)); + } + return range; +} + +TFileRangeDesc hudi_range_with_delta_logs() { + auto range = range_with_format("hudi", TFileFormatType::FORMAT_PARQUET); + THudiFileDesc hudi_params; + hudi_params.__set_delta_logs({"delta.log"}); + range.table_format_params.__set_hudi_params(std::move(hudi_params)); + return range; +} + +TScanRangeParams scan_range_param(const TFileRangeDesc& range) { + TScanRangeParams params; + params.scan_range.ext_scan_range.file_scan_range.ranges.push_back(range); + return params; +} + +VExprSPtr slot_ref(int slot_id, int column_id, DataTypePtr type, const std::string& name) { + return VSlotRef::create_shared(slot_id, column_id, -1, std::move(type), name); +} + +TExprNode bool_in_pred_node() { + TTypeDesc bool_type; + TTypeNode bool_node; + TScalarType bool_scalar_type; + bool_scalar_type.__set_type(TPrimitiveType::BOOLEAN); + bool_node.__set_type(TTypeNodeType::SCALAR); + bool_node.__set_scalar_type(bool_scalar_type); + bool_type.types.push_back(bool_node); + + TExprNode node; + node.__set_type(bool_type); + node.__set_node_type(TExprNodeType::IN_PRED); + node.in_predicate.__set_is_not_in(false); + node.__set_opcode(TExprOpcode::FILTER_IN); + node.__set_is_nullable(false); + return node; +} + +} // namespace + +// Scenario: FileScannerV2::is_supported should honor table format, scan params format, and the +// optional per-range file format override as a single matrix. +TEST(FileScannerV2Test, SupportedFormatMatrix) { + struct Case { + std::string table_format; + TFileFormatType::type params_format; + std::optional range_format; + bool expected; + }; + + const std::vector cases { + {"", TFileFormatType::FORMAT_PARQUET, std::nullopt, true}, + {"tvf", TFileFormatType::FORMAT_PARQUET, std::nullopt, true}, + {"hive", TFileFormatType::FORMAT_PARQUET, std::nullopt, true}, + {"iceberg", TFileFormatType::FORMAT_PARQUET, std::nullopt, true}, + {"paimon", TFileFormatType::FORMAT_PARQUET, std::nullopt, true}, + {"hudi", TFileFormatType::FORMAT_PARQUET, std::nullopt, true}, + {"jdbc", TFileFormatType::FORMAT_PARQUET, std::nullopt, false}, + {"", TFileFormatType::FORMAT_JNI, std::nullopt, false}, + {"hive", TFileFormatType::FORMAT_ORC, std::nullopt, false}, + {"jdbc", TFileFormatType::FORMAT_JNI, std::nullopt, true}, + {"hive", TFileFormatType::FORMAT_JNI, std::nullopt, false}, + {"", TFileFormatType::FORMAT_CSV_PLAIN, std::nullopt, true}, + {"tvf", TFileFormatType::FORMAT_CSV_GZ, std::nullopt, true}, + {"hive", TFileFormatType::FORMAT_CSV_BZ2, std::nullopt, true}, + {"hive", TFileFormatType::FORMAT_CSV_LZ4FRAME, std::nullopt, true}, + {"hive", TFileFormatType::FORMAT_CSV_LZ4BLOCK, std::nullopt, true}, + {"hive", TFileFormatType::FORMAT_CSV_LZOP, std::nullopt, true}, + {"hive", TFileFormatType::FORMAT_CSV_DEFLATE, std::nullopt, true}, + {"hive", TFileFormatType::FORMAT_CSV_SNAPPYBLOCK, std::nullopt, true}, + {"hive", TFileFormatType::FORMAT_PROTO, std::nullopt, true}, + {"hive", TFileFormatType::FORMAT_TEXT, std::nullopt, true}, + {"hive", TFileFormatType::FORMAT_JSON, std::nullopt, true}, + {"hive", TFileFormatType::FORMAT_PARQUET, TFileFormatType::FORMAT_ORC, false}, + {"hive", TFileFormatType::FORMAT_ORC, TFileFormatType::FORMAT_PARQUET, true}, + {"hive", TFileFormatType::FORMAT_PARQUET, TFileFormatType::FORMAT_CSV_PLAIN, true}, + {"hive", TFileFormatType::FORMAT_PARQUET, TFileFormatType::FORMAT_TEXT, true}, + {"hive", TFileFormatType::FORMAT_PARQUET, TFileFormatType::FORMAT_JSON, true}, + {"tvf", TFileFormatType::FORMAT_PARQUET, TFileFormatType::FORMAT_NATIVE, true}, + {"remote_doris", TFileFormatType::FORMAT_ARROW, std::nullopt, true}, + {"hive", TFileFormatType::FORMAT_ARROW, std::nullopt, false}, + {"", TFileFormatType::FORMAT_ARROW, std::nullopt, false}, + {"", TFileFormatType::FORMAT_WAL, std::nullopt, false}, + }; + + for (const auto& test_case : cases) { + TFileScanRangeParams params; + params.__set_format_type(test_case.params_format); + auto range = range_with_format(test_case.table_format, + test_case.range_format.value_or(test_case.params_format)); + if (!test_case.range_format.has_value()) { + range.__isset.format_type = false; + } + EXPECT_EQ(FileScannerV2::is_supported(params, range), test_case.expected) + << "table_format=" << test_case.table_format + << ", params_format=" << static_cast(test_case.params_format) + << ", range_has_format=" << test_case.range_format.has_value(); + } + + TFileScanRangeParams params; + params.__set_format_type(TFileFormatType::FORMAT_PARQUET); + EXPECT_FALSE(FileScannerV2::is_supported(params, hudi_range_with_delta_logs())); +} + +// Scenario: SplitSourceConnector should route to FileScannerV2 only when every scan range in the +// source is supported; one unsupported table format or file format must make the match fail. +TEST(FileScannerV2Test, SplitSourceAllScanRangesMatchRequiresEveryRangeSupported) { + TFileScanRangeParams params; + params.__set_format_type(TFileFormatType::FORMAT_PARQUET); + + const auto supported = range_with_format("hive", TFileFormatType::FORMAT_PARQUET); + const auto unsupported_table = range_with_format("lakesoul", TFileFormatType::FORMAT_PARQUET); + const auto unsupported_format = range_with_format("hive", TFileFormatType::FORMAT_ORC); + + LocalSplitSourceConnector all_supported( + {scan_range_param(supported), + scan_range_param(range_with_format("iceberg", TFileFormatType::FORMAT_PARQUET))}, + 1); + EXPECT_TRUE(all_supported.all_scan_ranges_match(params, FileScannerV2::is_supported)); + + LocalSplitSourceConnector hudi_supported( + {scan_range_param(supported), + scan_range_param(range_with_format("hudi", TFileFormatType::FORMAT_PARQUET))}, + 1); + EXPECT_TRUE(hudi_supported.all_scan_ranges_match(params, FileScannerV2::is_supported)); + + LocalSplitSourceConnector table_mismatch( + {scan_range_param(supported), scan_range_param(unsupported_table)}, 1); + EXPECT_FALSE(table_mismatch.all_scan_ranges_match(params, FileScannerV2::is_supported)); + + LocalSplitSourceConnector format_mismatch( + {scan_range_param(supported), scan_range_param(unsupported_format)}, 1); + EXPECT_FALSE(format_mismatch.all_scan_ranges_match(params, FileScannerV2::is_supported)); +} + +// Scenario: FileScannerV2 converts only the file formats implemented by format_v2 readers and +// rejects everything else before TableReader::init sees an unsupported FileFormat. +TEST(FileScannerV2Test, FileFormatConversionMatrix) { + struct Case { + TFileFormatType::type input; + std::optional expected; + }; + const std::vector cases { + {TFileFormatType::FORMAT_PARQUET, format::FileFormat::PARQUET}, + {TFileFormatType::FORMAT_JNI, format::FileFormat::JNI}, + {TFileFormatType::FORMAT_CSV_PLAIN, format::FileFormat::CSV}, + {TFileFormatType::FORMAT_CSV_GZ, format::FileFormat::CSV}, + {TFileFormatType::FORMAT_CSV_BZ2, format::FileFormat::CSV}, + {TFileFormatType::FORMAT_CSV_LZ4FRAME, format::FileFormat::CSV}, + {TFileFormatType::FORMAT_CSV_LZ4BLOCK, format::FileFormat::CSV}, + {TFileFormatType::FORMAT_CSV_LZOP, format::FileFormat::CSV}, + {TFileFormatType::FORMAT_CSV_DEFLATE, format::FileFormat::CSV}, + {TFileFormatType::FORMAT_CSV_SNAPPYBLOCK, format::FileFormat::CSV}, + {TFileFormatType::FORMAT_PROTO, format::FileFormat::CSV}, + {TFileFormatType::FORMAT_TEXT, format::FileFormat::TEXT}, + {TFileFormatType::FORMAT_JSON, format::FileFormat::JSON}, + {TFileFormatType::FORMAT_NATIVE, format::FileFormat::NATIVE}, + {TFileFormatType::FORMAT_ARROW, format::FileFormat::ARROW}, + {TFileFormatType::FORMAT_ORC, std::nullopt}, + }; + + for (const auto& test_case : cases) { + format::FileFormat file_format = format::FileFormat::PARQUET; + const auto status = FileScannerV2::TEST_to_file_format(test_case.input, &file_format); + if (test_case.expected.has_value()) { + ASSERT_TRUE(status.ok()) << status; + EXPECT_EQ(file_format, *test_case.expected); + } else { + EXPECT_FALSE(status.ok()); + } + } +} + +// Scenario: partition slots are identified from the explicit FE category when present, otherwise +// from the legacy is_file_slot flag. Scanner-generated rowid columns must never be treated as +// partition columns even if FE marks them as non-file slots. +TEST(FileScannerV2Test, PartitionSlotClassificationMatrix) { + TFileScanSlotInfo legacy_partition; + legacy_partition.__set_is_file_slot(false); + EXPECT_TRUE(FileScannerV2::TEST_is_partition_slot(legacy_partition, "dt")); + + TFileScanSlotInfo legacy_file; + legacy_file.__set_is_file_slot(true); + EXPECT_FALSE(FileScannerV2::TEST_is_partition_slot(legacy_file, "value")); + + TFileScanSlotInfo categorized_partition; + categorized_partition.__set_is_file_slot(true); + categorized_partition.__set_category(TColumnCategory::PARTITION_KEY); + EXPECT_TRUE(FileScannerV2::TEST_is_partition_slot(categorized_partition, "p")); + + TFileScanSlotInfo categorized_regular; + categorized_regular.__set_is_file_slot(false); + categorized_regular.__set_category(TColumnCategory::REGULAR); + EXPECT_FALSE(FileScannerV2::TEST_is_partition_slot(categorized_regular, "regular_col")); + + EXPECT_FALSE( + FileScannerV2::TEST_is_partition_slot(legacy_partition, BeConsts::GLOBAL_ROWID_COL)); + EXPECT_FALSE( + FileScannerV2::TEST_is_partition_slot(legacy_partition, BeConsts::ICEBERG_ROWID_COL)); +} + +// Scenario: data-file slots are the complement of partition/default/synthesized columns for +// formats without embedded schema. FE may send either the new category or the old is_file_slot +// flag, and scanner-generated rowid columns must never be passed to a physical file reader. +TEST(FileScannerV2Test, DataFileSlotClassificationMatrix) { + TFileScanSlotInfo legacy_file; + legacy_file.__set_is_file_slot(true); + EXPECT_TRUE(FileScannerV2::TEST_is_data_file_slot(legacy_file, "value")); + + TFileScanSlotInfo legacy_partition; + legacy_partition.__set_is_file_slot(false); + EXPECT_FALSE(FileScannerV2::TEST_is_data_file_slot(legacy_partition, "dt")); + + TFileScanSlotInfo categorized_regular; + categorized_regular.__set_is_file_slot(false); + categorized_regular.__set_category(TColumnCategory::REGULAR); + EXPECT_TRUE(FileScannerV2::TEST_is_data_file_slot(categorized_regular, "regular_col")); + + TFileScanSlotInfo categorized_generated; + categorized_generated.__set_is_file_slot(false); + categorized_generated.__set_category(TColumnCategory::GENERATED); + EXPECT_TRUE(FileScannerV2::TEST_is_data_file_slot(categorized_generated, "generated_col")); + + TFileScanSlotInfo categorized_partition; + categorized_partition.__set_is_file_slot(true); + categorized_partition.__set_category(TColumnCategory::PARTITION_KEY); + EXPECT_FALSE(FileScannerV2::TEST_is_data_file_slot(categorized_partition, "p")); + + TFileScanSlotInfo categorized_synthesized; + categorized_synthesized.__set_is_file_slot(true); + categorized_synthesized.__set_category(TColumnCategory::SYNTHESIZED); + EXPECT_FALSE(FileScannerV2::TEST_is_data_file_slot(categorized_synthesized, "virtual_col")); + + EXPECT_FALSE(FileScannerV2::TEST_is_data_file_slot(legacy_file, BeConsts::GLOBAL_ROWID_COL)); + EXPECT_FALSE(FileScannerV2::TEST_is_data_file_slot(legacy_file, BeConsts::ICEBERG_ROWID_COL)); +} + +// Scenario: table conjuncts are cloned into global-index space before they are handed to +// TableReader. Explicit slot-id mappings use the required_slots order; missing mappings fall back +// to the slot id itself for legacy descriptors. +TEST(FileScannerV2Test, RewriteSlotRefsToGlobalIndexMatrix) { + const auto int_type = std::make_shared(); + { + auto expr = slot_ref(42, 99, int_type, "value"); + const auto status = FileScannerV2::TEST_rewrite_slot_refs_to_global_index( + &expr, {{42, format::GlobalIndex(3)}}); + ASSERT_TRUE(status.ok()) << status; + const auto* rewritten = assert_cast(expr.get()); + EXPECT_EQ(rewritten->slot_id(), 3); + EXPECT_EQ(rewritten->column_id(), 3); + EXPECT_EQ(rewritten->column_name(), "value"); + } + { + auto expr = slot_ref(7, 99, int_type, "legacy_value"); + const auto status = FileScannerV2::TEST_rewrite_slot_refs_to_global_index(&expr, {}); + ASSERT_TRUE(status.ok()) << status; + const auto* rewritten = assert_cast(expr.get()); + EXPECT_EQ(rewritten->slot_id(), 7); + EXPECT_EQ(rewritten->column_id(), 7); + EXPECT_EQ(rewritten->column_name(), "legacy_value"); + } + { + auto cast_expr = format::Cast::create_shared(int_type); + cast_expr->add_child(slot_ref(9, 9, int_type, "nested_value")); + VExprSPtr expr = cast_expr; + const auto status = FileScannerV2::TEST_rewrite_slot_refs_to_global_index( + &expr, {{9, format::GlobalIndex(1)}}); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(expr->get_num_children(), 1); + const auto* rewritten_child = assert_cast(expr->children()[0].get()); + EXPECT_EQ(rewritten_child->slot_id(), 1); + EXPECT_EQ(rewritten_child->column_id(), 1); + EXPECT_EQ(rewritten_child->column_name(), "nested_value"); + } + { + const auto node = bool_in_pred_node(); + auto impl = VDirectInPredicate::create_shared(node, nullptr); + impl->add_child(slot_ref(11, 11, int_type, "rf_value")); + VExprSPtr expr = RuntimeFilterExpr::create_shared(node, impl, 0.4, false, 7); + const auto status = FileScannerV2::TEST_rewrite_slot_refs_to_global_index( + &expr, {{11, format::GlobalIndex(2)}}); + ASSERT_TRUE(status.ok()) << status; + + auto* runtime_filter = assert_cast(expr.get()); + auto rewritten_impl = runtime_filter->get_impl(); + ASSERT_NE(rewritten_impl, nullptr); + ASSERT_EQ(rewritten_impl->get_num_children(), 1); + const auto* rewritten_child = + assert_cast(rewritten_impl->children()[0].get()); + EXPECT_EQ(rewritten_child->slot_id(), 2); + EXPECT_EQ(rewritten_child->column_id(), 2); + EXPECT_EQ(rewritten_child->column_name(), "rf_value"); + } +} + +} // namespace doris diff --git a/be/test/exec/scan/vfile_scanner_exception_test.cpp b/be/test/exec/scan/vfile_scanner_exception_test.cpp index 64b17a6a86b87b..70b3d07f8eff48 100644 --- a/be/test/exec/scan/vfile_scanner_exception_test.cpp +++ b/be/test/exec/scan/vfile_scanner_exception_test.cpp @@ -18,13 +18,19 @@ #include #include +#include #include +#include #include #include "common/object_pool.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" #include "cpp/sync_point.h" #include "exec/operator/file_scan_operator.h" #include "exec/scan/file_scanner.h" +#include "exec/scan/split_source_connector.h" +#include "format_v2/table/hive_reader.h" #include "io/fs/local_file_system.h" #include "load/group_commit/wal/wal_manager.h" #include "runtime/cluster_info.h" @@ -34,7 +40,6 @@ #include "runtime/user_function_cache.h" namespace doris { - class TestSplitSourceConnectorStub : public SplitSourceConnector { private: std::mutex _range_lock; @@ -336,4 +341,112 @@ TEST_F(VfileScannerExceptionTest, process_late_arrival_conjuncts_retain) { WARN_IF_ERROR(scanner->close(&_runtime_state), "fail to close scanner"); } +TEST(HiveReaderPositionMappingTest, PositionMappingUsesColumnIdxsForFileSlots) { + TQueryOptions query_options; + query_options.hive_parquet_use_column_names = false; + RuntimeState runtime_state(query_options, TQueryGlobals()); + TFileScanRangeParams params; + params.__set_format_type(TFileFormatType::FORMAT_PARQUET); + params.__set_column_idxs({2, 0}); + format::ProjectedColumnBuildContext context { + .scan_params = ¶ms, + .runtime_state = &runtime_state, + }; + format::hive::HiveReader reader; + + TFileScanSlotInfo id_slot; + id_slot.__set_is_file_slot(true); + format::ColumnDefinition id_column { + .identifier = Field::create_field("id"), + .name = "id", + .type = std::make_shared(), + }; + + TFileScanSlotInfo name_slot; + name_slot.__set_is_file_slot(true); + format::ColumnDefinition name_column { + .identifier = Field::create_field("name"), + .name = "name", + .type = std::make_shared(), + }; + + ASSERT_TRUE(reader.annotate_projected_column(id_slot, &context, &id_column).ok()); + ASSERT_TRUE(id_column.has_identifier_field_id()); + EXPECT_EQ(id_column.get_identifier_position(), 2); + EXPECT_EQ(context.next_file_column_idx, 1); + + ASSERT_TRUE(reader.annotate_projected_column(name_slot, &context, &name_column).ok()); + ASSERT_TRUE(name_column.has_identifier_field_id()); + EXPECT_EQ(name_column.get_identifier_position(), 0); + EXPECT_EQ(context.next_file_column_idx, 2); + ASSERT_TRUE(reader.validate_projected_columns(context).ok()); +} + +TEST(HiveReaderPositionMappingTest, PositionMappingDoesNotConsumePartitionSlots) { + TQueryOptions query_options; + query_options.hive_parquet_use_column_names = false; + RuntimeState runtime_state(query_options, TQueryGlobals()); + TFileScanRangeParams params; + params.__set_format_type(TFileFormatType::FORMAT_PARQUET); + params.__set_column_idxs({3}); + format::ProjectedColumnBuildContext context { + .scan_params = ¶ms, + .runtime_state = &runtime_state, + }; + format::hive::HiveReader reader; + + TFileScanSlotInfo partition_slot; + partition_slot.__set_is_file_slot(false); + partition_slot.__set_category(TColumnCategory::PARTITION_KEY); + format::ColumnDefinition partition_column { + .identifier = Field::create_field("year"), + .name = "year", + .type = std::make_shared(), + }; + + TFileScanSlotInfo value_slot; + value_slot.__set_is_file_slot(true); + format::ColumnDefinition value_column { + .identifier = Field::create_field("value"), + .name = "value", + .type = std::make_shared(), + }; + + ASSERT_TRUE(reader.annotate_projected_column(partition_slot, &context, &partition_column).ok()); + ASSERT_TRUE(partition_column.has_identifier_name()); + EXPECT_EQ(partition_column.get_identifier_name(), "year"); + EXPECT_EQ(context.next_file_column_idx, 0); + + ASSERT_TRUE(reader.annotate_projected_column(value_slot, &context, &value_column).ok()); + ASSERT_TRUE(value_column.has_identifier_field_id()); + EXPECT_EQ(value_column.get_identifier_position(), 3); + EXPECT_EQ(context.next_file_column_idx, 1); + ASSERT_TRUE(reader.validate_projected_columns(context).ok()); +} + +TEST(HiveReaderPositionMappingTest, PositionMappingFailsWhenColumnIdxsMissing) { + TQueryOptions query_options; + query_options.hive_parquet_use_column_names = false; + RuntimeState runtime_state(query_options, TQueryGlobals()); + TFileScanRangeParams params; + params.__set_format_type(TFileFormatType::FORMAT_PARQUET); + format::ProjectedColumnBuildContext context { + .scan_params = ¶ms, + .runtime_state = &runtime_state, + }; + format::hive::HiveReader reader; + + TFileScanSlotInfo value_slot; + value_slot.__set_is_file_slot(true); + format::ColumnDefinition value_column { + .identifier = Field::create_field("value"), + .name = "value", + .type = std::make_shared(), + }; + + auto status = reader.annotate_projected_column(value_slot, &context, &value_column); + EXPECT_FALSE(status.ok()); + EXPECT_EQ(context.next_file_column_idx, 0); +} + } // namespace doris diff --git a/be/test/format_v2/column_mapper_test.cpp b/be/test/format_v2/column_mapper_test.cpp new file mode 100644 index 00000000000000..d870f85dd7dc10 --- /dev/null +++ b/be/test/format_v2/column_mapper_test.cpp @@ -0,0 +1,4140 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/column_mapper.h" + +#include + +#include +#include +#include +#include +#include + +#include "common/consts.h" +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_decimal.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type/data_type_struct.h" +#include "core/data_type/data_type_timestamptz.h" +#include "exprs/vexpr.h" +#include "exprs/vexpr_context.h" +#include "exprs/vin_predicate.h" +#include "exprs/vliteral.h" +#include "exprs/vslot_ref.h" +#include "format_v2/column_mapper_nested.h" +#include "format_v2/expr/cast.h" +#include "format_v2/schema_projection.h" +#include "format_v2/table_reader.h" +#include "gen_cpp/Exprs_types.h" +#include "runtime/descriptors.h" +#include "storage/predicate/predicate_creator.h" +#include "testutil/column_helper.h" +#include "testutil/mock/mock_runtime_state.h" + +namespace doris::format { +namespace { + +DataTypePtr i32() { + return std::make_shared(); +} + +DataTypePtr i64() { + return std::make_shared(); +} + +DataTypePtr f32() { + return std::make_shared(); +} + +DataTypePtr f64() { + return std::make_shared(); +} + +DataTypePtr dec32(uint32_t precision, uint32_t scale) { + return std::make_shared(precision, scale); +} + +DataTypePtr str() { + return std::make_shared(); +} + +DataTypePtr timestamptz(uint32_t scale) { + return std::make_shared(scale); +} + +DataTypePtr u8() { + return std::make_shared(); +} + +ColumnDefinition field_id_col(const std::string& name, int32_t field_id, DataTypePtr type, + int32_t local_id = -1) { + ColumnDefinition column; + column.identifier = Field::create_field(field_id); + column.local_id = local_id; + column.name = name; + column.type = std::move(type); + return column; +} + +ColumnDefinition name_col(const std::string& name, DataTypePtr type, int32_t local_id = -1) { + ColumnDefinition column; + column.identifier = Field::create_field(name); + column.local_id = local_id; + column.name = name; + column.type = std::move(type); + return column; +} + +ColumnDefinition name_id_col(const std::string& name, const std::string& identifier, + DataTypePtr type, int32_t local_id = -1) { + ColumnDefinition column = name_col(name, std::move(type), local_id); + column.identifier = Field::create_field(identifier); + return column; +} + +ColumnDefinition position_col(const std::string& name, int32_t file_position, DataTypePtr type) { + return field_id_col(name, file_position, std::move(type)); +} + +ColumnDefinition struct_col(const std::string& name, int32_t field_id, + std::vector children, int32_t local_id = -1) { + DataTypes child_types; + Strings child_names; + child_types.reserve(children.size()); + child_names.reserve(children.size()); + for (const auto& child : children) { + child_types.push_back(child.type); + child_names.push_back(child.name); + } + auto column = field_id_col( + name, field_id, std::make_shared(child_types, child_names), local_id); + column.children = std::move(children); + return column; +} + +ColumnDefinition struct_name_col(const std::string& name, std::vector children, + int32_t local_id = -1) { + auto column = struct_col(name, -1, std::move(children), local_id); + column.identifier = Field::create_field(name); + return column; +} + +ColumnDefinition array_col(const std::string& name, int32_t field_id, ColumnDefinition element, + int32_t local_id = -1) { + auto column = + field_id_col(name, field_id, std::make_shared(element.type), local_id); + column.children = {std::move(element)}; + return column; +} + +ColumnDefinition map_col(const std::string& name, int32_t field_id, + std::vector children, const DataTypePtr& key_type, + const DataTypePtr& value_type, int32_t local_id = -1) { + auto column = field_id_col(name, field_id, std::make_shared(key_type, value_type), + local_id); + column.children = std::move(children); + return column; +} + +void set_name_identifiers(ColumnDefinition* column, int32_t local_id) { + DORIS_CHECK(column != nullptr); + column->identifier = Field::create_field(column->name); + column->local_id = local_id; + for (size_t idx = 0; idx < column->children.size(); ++idx) { + set_name_identifiers(&column->children[idx], static_cast(idx)); + } +} + +std::vector projection_ids(const std::vector& projections) { + std::vector ids; + ids.reserve(projections.size()); + for (const auto& projection : projections) { + ids.push_back(projection.local_id()); + } + return ids; +} + +std::vector target_names(const FileStructPredicateTarget* target) { + std::vector names; + for (const auto* current = target; current != nullptr; current = current->child.get()) { + names.push_back(current->file_child_name); + } + return names; +} + +TEST(ColumnMapperDebugTest, CoversDebugStringEnumAndNestedBranches) { + ColumnDefinition child = field_id_col("child", 2, str(), 3); + child.name_mapping = {"legacy_child"}; + + ColumnDefinition column = field_id_col( + "root", 1, + std::make_shared(DataTypes {child.type}, Strings {child.name})); + column.name_mapping = {"legacy_root"}; + column.children = {child}; + column.default_expr = VExprContext::create_shared(VLiteral::create_shared( + std::make_shared(), Field::create_field("fallback"))); + column.is_partition_key = true; + + const auto column_debug = column.debug_string(); + EXPECT_NE(column_debug.find("ColumnDefinition{name=root"), std::string::npos); + EXPECT_NE(column_debug.find("name_mapping=[legacy_root]"), std::string::npos); + EXPECT_NE(column_debug.find("children=[ColumnDefinition{name=child"), std::string::npos); + EXPECT_NE(column_debug.find("has_default_expr=1"), std::string::npos); + EXPECT_NE(column_debug.find("is_partition_key=1"), std::string::npos); + + LocalColumnIndex projection = LocalColumnIndex::partial_local(4); + projection.children.push_back(LocalColumnIndex::local(7)); + EXPECT_NE(projection.debug_string().find("children=[LocalColumnIndex{index=7"), + std::string::npos); + + const std::vector modes {TableColumnMappingMode::BY_FIELD_ID, + TableColumnMappingMode::BY_NAME, + TableColumnMappingMode::BY_INDEX}; + const std::vector mode_names {"BY_FIELD_ID", "BY_NAME", "BY_INDEX"}; + for (size_t idx = 0; idx < modes.size(); ++idx) { + TableColumnMapperOptions options {.mode = modes[idx]}; + EXPECT_NE(options.debug_string().find(mode_names[idx]), std::string::npos); + } + + const std::vector conversions { + FilterConversionType::COPY_DIRECTLY, FilterConversionType::CAST_FILTER, + FilterConversionType::READER_EXPRESSION, FilterConversionType::FINALIZE_ONLY, + FilterConversionType::CONSTANT}; + const std::vector conversion_names { + "COPY_DIRECTLY", "CAST_FILTER", "READER_EXPRESSION", "FINALIZE_ONLY", "CONSTANT"}; + for (size_t idx = 0; idx < conversions.size(); ++idx) { + ColumnMapping mapping; + mapping.global_index = GlobalIndex(idx); + mapping.table_column_name = "table_col"; + mapping.file_local_id = 8; + mapping.constant_index = ConstantIndex(9); + mapping.file_column_name = "file_col"; + mapping.original_file_type = str(); + mapping.original_file_children = {child}; + mapping.file_type = str(); + mapping.table_type = str(); + mapping.is_trivial = idx % 2 == 0; + mapping.filter_conversion = conversions[idx]; + mapping.virtual_column_type = static_cast( + idx % (TableVirtualColumnType::ICEBERG_ROWID + 1)); + mapping.default_expr = column.default_expr; + + ColumnMapping child_mapping; + child_mapping.global_index = GlobalIndex(10 + idx); + child_mapping.table_column_name = "child_col"; + child_mapping.file_column_name = "child_file"; + child_mapping.file_type = i32(); + child_mapping.table_type = i32(); + mapping.child_mappings.push_back(std::move(child_mapping)); + + const auto debug = mapping.debug_string(); + EXPECT_NE(debug.find("file_local_id=8"), std::string::npos); + EXPECT_NE(debug.find("constant_index=9"), std::string::npos); + EXPECT_NE(debug.find(conversion_names[idx]), std::string::npos); + EXPECT_NE(debug.find("child_mappings=[ColumnMapping{global_index="), std::string::npos); + EXPECT_NE(debug.find("has_default_expr=1"), std::string::npos); + } +} + +void expect_mapping(const ColumnMapping& mapping, size_t global_index, + const std::string& table_name, int32_t file_local_id, + const std::string& file_name, const DataTypePtr& file_type, + const DataTypePtr& table_type) { + EXPECT_EQ(mapping.global_index, GlobalIndex(global_index)); + EXPECT_EQ(mapping.table_column_name, table_name); + ASSERT_TRUE(mapping.file_local_id.has_value()); + EXPECT_EQ(*mapping.file_local_id, file_local_id); + EXPECT_EQ(mapping.file_column_name, file_name); + ASSERT_NE(mapping.file_type, nullptr); + ASSERT_NE(mapping.table_type, nullptr); + EXPECT_TRUE(mapping.file_type->equals(*file_type)); + EXPECT_TRUE(mapping.table_type->equals(*table_type)); +} + +void expect_constant(const TableColumnMapper& mapper, const ColumnMapping& mapping, + size_t global_index, const DataTypePtr& table_type) { + EXPECT_FALSE(mapping.file_local_id.has_value()); + ASSERT_TRUE(mapping.constant_index.has_value()); + ASSERT_LT(mapping.constant_index->value(), mapper.constant_map().size()); + const auto& entry = mapper.constant_map().get(*mapping.constant_index); + EXPECT_EQ(entry.global_index, GlobalIndex(global_index)); + EXPECT_TRUE(entry.type->equals(*table_type)); + EXPECT_EQ(entry.expr, mapping.default_expr); +} + +void expect_missing(const ColumnMapping& mapping) { + EXPECT_FALSE(mapping.file_local_id.has_value()); + EXPECT_FALSE(mapping.constant_index.has_value()); + EXPECT_EQ(mapping.virtual_column_type, TableVirtualColumnType::INVALID); +} + +class TestFunctionExpr final : public VExpr { +public: + TestFunctionExpr(std::string function_name, DataTypePtr data_type, + TExprNodeType::type node_type = TExprNodeType::FUNCTION_CALL, + TExprOpcode::type opcode = TExprOpcode::INVALID_OPCODE) + : VExpr(std::move(data_type), false), _expr_name(std::move(function_name)) { + set_node_type(node_type); + _opcode = opcode; + TFunctionName fn_name; + fn_name.__set_function_name(_expr_name); + _fn.__set_name(fn_name); + } + + const std::string& expr_name() const override { return _expr_name; } + + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = + std::make_shared(_expr_name, data_type(), node_type(), _opcode); + return Status::OK(); + } + + Status execute_column_impl(VExprContext*, const Block*, const Selector*, size_t, + ColumnPtr&) const override { + return Status::NotSupported("TestFunctionExpr is only used for ColumnMapper analysis"); + } + +private: + std::string _expr_name; +}; + +VExprSPtr table_slot(int slot_id, int column_id, DataTypePtr type, const std::string& name) { + return VSlotRef::create_shared(slot_id, column_id, -1, std::move(type), name); +} + +VExprSPtr literal(DataTypePtr type, Field value) { + return VLiteral::create_shared(std::move(type), std::move(value)); +} + +VExprSPtr struct_element(const VExprSPtr& parent, DataTypePtr child_type, + const std::string& child_name) { + auto expr = std::make_shared("struct_element", child_type); + expr->add_child(parent); + expr->add_child(literal(str(), Field::create_field(child_name))); + return expr; +} + +VExprSPtr element_at(const VExprSPtr& parent, DataTypePtr child_type, + const std::string& child_name) { + auto expr = std::make_shared("element_at", std::move(child_type)); + expr->add_child(parent); + expr->add_child(literal(str(), Field::create_field(child_name))); + return expr; +} + +VExprSPtr array_element_at(const VExprSPtr& parent, DataTypePtr child_type, int64_t ordinal) { + auto expr = std::make_shared("element_at", std::move(child_type)); + expr->add_child(parent); + expr->add_child(literal(i64(), Field::create_field(ordinal))); + return expr; +} + +VExprSPtr map_values(const VExprSPtr& parent, DataTypePtr value_type) { + auto expr = std::make_shared( + "map_values", std::make_shared(std::move(value_type))); + expr->add_child(parent); + return expr; +} + +VExprSPtr map_keys(const VExprSPtr& parent, DataTypePtr key_type) { + auto expr = std::make_shared( + "map_keys", std::make_shared(std::move(key_type))); + expr->add_child(parent); + return expr; +} + +VExprSPtr array_contains(const VExprSPtr& array, const VExprSPtr& value) { + auto expr = std::make_shared("array_contains", u8()); + expr->add_child(array); + expr->add_child(value); + return expr; +} + +VExprSPtr like_expr(const VExprSPtr& left, const std::string& pattern) { + auto expr = std::make_shared("like", u8()); + expr->add_child(left); + expr->add_child(literal(str(), Field::create_field(pattern))); + return expr; +} + +VExprSPtr struct_element_by_selector(const VExprSPtr& parent, DataTypePtr child_type, + const VExprSPtr& selector) { + auto expr = std::make_shared("struct_element", std::move(child_type)); + expr->add_child(parent); + expr->add_child(selector); + return expr; +} + +VExprSPtr int_gt(const VExprSPtr& left, int32_t value) { + auto expr = std::make_shared("gt", u8(), TExprNodeType::BINARY_PRED, + TExprOpcode::GT); + expr->add_child(left); + expr->add_child(literal(i32(), Field::create_field(value))); + return expr; +} + +VExprSPtr binary_predicate(TExprOpcode::type opcode, const VExprSPtr& left, + const VExprSPtr& right) { + auto expr = std::make_shared("binary_predicate", u8(), + TExprNodeType::BINARY_PRED, opcode); + expr->add_child(left); + expr->add_child(right); + return expr; +} + +VExprSPtr in_predicate(const VExprSPtr& probe, const DataTypePtr& literal_type, + const std::vector& values) { + auto expr = std::make_shared("in", u8(), TExprNodeType::IN_PRED); + expr->add_child(probe); + for (const auto& value : values) { + expr->add_child(literal(literal_type, value)); + } + return expr; +} + +VExprSPtr null_predicate(const VExprSPtr& child, bool is_null) { + auto expr = + std::make_shared(is_null ? "is_null_pred" : "is_not_null_pred", u8()); + expr->add_child(child); + return expr; +} + +VExprSPtr cast_expr(const VExprSPtr& child, DataTypePtr target_type) { + auto expr = Cast::create_shared(std::move(target_type)); + expr->add_child(child); + return expr; +} + +VExprSPtr compound_predicate(TExprOpcode::type opcode, const VExprSPtr& left, + const VExprSPtr& right) { + auto expr = std::make_shared("compound", u8(), TExprNodeType::COMPOUND_PRED, + opcode); + expr->add_child(left); + expr->add_child(right); + return expr; +} + +ColumnMapping mapped_struct_column(int32_t root_file_local_id, const std::string& child_name, + int32_t child_file_local_id, DataTypePtr child_type) { + ColumnDefinition file_child = name_col(child_name, child_type, child_file_local_id); + ColumnMapping root; + root.global_index = GlobalIndex(0); + root.table_column_name = "s"; + root.file_local_id = root_file_local_id; + root.file_column_name = "s"; + root.table_type = + std::make_shared(DataTypes {child_type}, Strings {child_name}); + root.file_type = root.table_type; + root.original_file_type = root.table_type; + root.original_file_children = {file_child}; + root.projected_file_children = {file_child}; + return root; +} + +std::vector collect_paths(const VExprSPtr& expr) { + std::vector paths; + collect_nested_struct_paths(expr, &paths); + return paths; +} + +void expect_name_selector(const StructChildSelector& selector, const std::string& name) { + EXPECT_TRUE(selector.by_name); + EXPECT_EQ(selector.name, name); +} + +void expect_ordinal_selector(const StructChildSelector& selector, size_t ordinal) { + EXPECT_FALSE(selector.by_name); + EXPECT_EQ(selector.ordinal, ordinal); +} + +void expect_path_root(const NestedStructPath& path, size_t global_index) { + EXPECT_EQ(path.root_global_index, GlobalIndex(global_index)); +} + +class ColumnMapperCastTest : public testing::Test { +protected: + void SetUp() override { state.set_enable_strict_cast(true); } + + Status prepare_open_execute(VExprContext* context, Block* block, int* result_column_id) { + RETURN_IF_ERROR(context->prepare(&state, RowDescriptor())); + RETURN_IF_ERROR(context->open(&state)); + return context->execute(block, result_column_id); + } + + MockRuntimeState state; +}; + +class Int64ChildGreaterThanExpr final : public VExpr { +public: + explicit Int64ChildGreaterThanExpr(int64_t value) + : VExpr(std::make_shared(), false), _value(value) {} + + Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, + size_t count, ColumnPtr& result_column) const override { + ColumnPtr child_column; + RETURN_IF_ERROR( + get_child(0)->execute_column(context, block, selector, count, child_column)); + const auto& input = assert_cast(*child_column); + auto result = ColumnUInt8::create(); + auto& result_data = result->get_data(); + result_data.resize(count); + for (size_t row = 0; row < count; ++row) { + result_data[row] = input.get_element(row) > _value; + } + result_column = std::move(result); + return Status::OK(); + } + + const std::string& expr_name() const override { return _expr_name; } + + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = std::make_shared(_value); + return Status::OK(); + } + +private: + const int64_t _value; + const std::string _expr_name = "Int64ChildGreaterThanExpr"; +}; + +class Int64BinaryPredicateExpr final : public VExpr { +public: + explicit Int64BinaryPredicateExpr(TExprOpcode::type opcode) + : VExpr(std::make_shared(), false) { + set_node_type(TExprNodeType::BINARY_PRED); + _opcode = opcode; + } + + Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, + size_t count, ColumnPtr& result_column) const override { + ColumnPtr left_column; + RETURN_IF_ERROR(get_child(0)->execute_column(context, block, selector, count, left_column)); + ColumnPtr right_column; + RETURN_IF_ERROR( + get_child(1)->execute_column(context, block, selector, count, right_column)); + + auto result = ColumnUInt8::create(); + auto& result_data = result->get_data(); + result_data.resize(count); + for (size_t row = 0; row < count; ++row) { + const auto left = left_column->get_int(row); + const auto right = right_column->get_int(row); + switch (_opcode) { + case TExprOpcode::GT: + result_data[row] = left > right; + break; + case TExprOpcode::LT: + result_data[row] = left < right; + break; + default: + return Status::InternalError("Unsupported test opcode {}", _opcode); + } + } + result_column = std::move(result); + return Status::OK(); + } + + const std::string& expr_name() const override { return _expr_name; } + + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = std::make_shared(_opcode); + return Status::OK(); + } + +private: + const std::string _expr_name = "Int64BinaryPredicateExpr"; +}; + +VExprSPtr create_in_predicate() { + TExprNode node; + node.__set_node_type(TExprNodeType::IN_PRED); + node.__set_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN)); + node.__set_is_nullable(false); + node.__set_num_children(0); + TInPredicate in_predicate; + in_predicate.__set_is_not_in(false); + node.__set_in_predicate(in_predicate); + return VInPredicate::create_shared(node); +} + +// ---------------------------------------------------------------------- +// L0 schema projection helper tests. +// These tests isolate LocalColumnIndex projection semantics before +// TableColumnMapper starts mutating ColumnMapping state. +// ---------------------------------------------------------------------- + +TEST(ColumnMapperSchemaProjectionTest, ProjectsStructByLocalIdAndKeepsFileOrder) { + auto a = field_id_col("a", 101, i32(), 0); + auto b = field_id_col("b", 102, str(), 1); + auto root = struct_col("s", 100, {a, b}, 7); + + LocalColumnIndex projection = LocalColumnIndex::partial_local(7); + projection.children.push_back(LocalColumnIndex::local(1)); + projection.children.push_back(LocalColumnIndex::local(0)); + + ColumnDefinition projected; + ASSERT_TRUE(project_column_definition(root, projection, &projected).ok()); + ASSERT_EQ(projected.children.size(), 2); + EXPECT_EQ(projected.children[0].name, "a"); + EXPECT_EQ(projected.children[1].name, "b"); + + const auto* projected_type = + assert_cast(remove_nullable(projected.type).get()); + ASSERT_EQ(projected_type->get_elements().size(), 2); + EXPECT_EQ(projected_type->get_element_name(0), "a"); + EXPECT_EQ(projected_type->get_element_name(1), "b"); +} + +TEST(ColumnMapperSchemaProjectionTest, ProjectsArrayElementStructLeaf) { + auto a = field_id_col("a", 1, i32(), 0); + auto b = field_id_col("b", 2, str(), 1); + auto element = struct_col("element", 10, {a, b}, 0); + auto array = array_col("items", 100, element, 5); + + LocalColumnIndex projection = LocalColumnIndex::partial_local(5); + auto element_projection = LocalColumnIndex::partial_local(0); + element_projection.children.push_back(LocalColumnIndex::local(1)); + projection.children.push_back(std::move(element_projection)); + + ColumnDefinition projected; + ASSERT_TRUE(project_column_definition(array, projection, &projected).ok()); + ASSERT_EQ(projected.children.size(), 1); + ASSERT_EQ(projected.children[0].children.size(), 1); + EXPECT_EQ(projected.children[0].children[0].name, "b"); + + const auto* array_type = + assert_cast(remove_nullable(projected.type).get()); + const auto* element_type = assert_cast( + remove_nullable(array_type->get_nested_type()).get()); + ASSERT_EQ(element_type->get_elements().size(), 1); + EXPECT_EQ(element_type->get_element_name(0), "b"); +} + +TEST(ColumnMapperSchemaProjectionTest, ProjectsMapValueStructLeaf) { + auto key = field_id_col("key", 1, str(), 0); + auto value_a = field_id_col("a", 2, i32(), 0); + auto value_b = field_id_col("b", 3, str(), 1); + auto value_type = + std::make_shared(DataTypes {i32(), str()}, Strings {"a", "b"}); + ColumnDefinition value = field_id_col("value", 4, value_type, 1); + value.children = {value_a, value_b}; + auto map = map_col("m", 100, {key, value}, str(), value_type, 9); + + LocalColumnIndex projection = LocalColumnIndex::partial_local(9); + projection.children.push_back(LocalColumnIndex::local(0)); + auto value_projection = LocalColumnIndex::partial_local(1); + value_projection.children.push_back(LocalColumnIndex::local(1)); + projection.children.push_back(std::move(value_projection)); + + ColumnDefinition projected; + ASSERT_TRUE(project_column_definition(map, projection, &projected).ok()); + ASSERT_EQ(projected.children.size(), 2); + EXPECT_EQ(projected.children[0].name, "key"); + EXPECT_TRUE(projected.children[0].children.empty()); + EXPECT_EQ(projected.children[1].name, "value"); + ASSERT_EQ(projected.children[1].children.size(), 1); + EXPECT_EQ(projected.children[1].children[0].name, "b"); + + const auto* map_type = assert_cast(remove_nullable(projected.type).get()); + const auto* projected_value = + assert_cast(remove_nullable(map_type->get_value_type()).get()); + ASSERT_EQ(projected_value->get_elements().size(), 1); + EXPECT_EQ(projected_value->get_element_name(0), "b"); +} + +TEST(ColumnMapperSchemaProjectionTest, RejectsMapKeyOnlyProjection) { + auto key = field_id_col("key", 1, str(), 0); + auto value = field_id_col("value", 2, i32(), 1); + auto map = map_col("m", 100, {key, value}, str(), i32(), 9); + + LocalColumnIndex projection = LocalColumnIndex::partial_local(9); + projection.children.push_back(LocalColumnIndex::local(0)); + + ColumnDefinition projected; + const auto status = project_column_definition(map, projection, &projected); + ASSERT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("contains no value child"), std::string::npos); +} + +TEST(ColumnMapperSchemaProjectionTest, RejectsInvalidProjectionChildIdWithFieldName) { + auto root = struct_col("s", 100, {field_id_col("a", 101, i32(), 0)}, 7); + + LocalColumnIndex projection = LocalColumnIndex::partial_local(7); + projection.children.push_back(LocalColumnIndex::local(99)); + + ColumnDefinition projected; + const auto status = project_column_definition(root, projection, &projected); + ASSERT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("Invalid projection child id 99 for field s"), + std::string::npos); +} + +TEST(ColumnMapperSchemaProjectionTest, RejectsEmptyProjectionPathWithFieldName) { + auto root = struct_col("s", 100, {field_id_col("a", 101, i32(), 0)}, 7); + + LocalColumnIndex projection = LocalColumnIndex::partial_local(7); + projection.children.push_back(LocalColumnIndex::local(-1)); + + ColumnDefinition projected; + const auto status = project_column_definition(root, projection, &projected); + ASSERT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("Empty projection path for field s"), std::string::npos); +} + +TEST(ColumnMapperSchemaProjectionTest, RejectsInvalidChildProjectionForPrimitiveField) { + auto root = field_id_col("i", 1, i32(), 7); + LocalColumnIndex projection = LocalColumnIndex::partial_local(7); + projection.children.push_back(LocalColumnIndex::local(0)); + + ColumnDefinition projected; + const auto status = project_column_definition(root, projection, &projected); + ASSERT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("Invalid projection child id 0 for field i"), + std::string::npos); +} + +// ---------------------------------------------------------------------- +// L0 nested helper tests. +// These tests cover child ordering, direct schema path resolution, and +// predicate-filter merging without going through create_scan_request(). +// ---------------------------------------------------------------------- + +TEST(ColumnMapperNestedHelperTest, PresentChildMappingsAreSortedByFileLocalId) { + ColumnMapping b; + b.table_column_name = "b"; + b.file_local_id = 2; + ColumnMapping missing; + missing.table_column_name = "missing"; + ColumnMapping a; + a.table_column_name = "a"; + a.file_local_id = 1; + + const std::vector child_mappings = {b, missing, a}; + const auto present = present_child_mappings_in_file_order(child_mappings); + ASSERT_EQ(present.size(), 2); + EXPECT_EQ(present[0]->table_column_name, "a"); + EXPECT_EQ(present[1]->table_column_name, "b"); +} + +TEST(ColumnMapperNestedHelperTest, BuildsProjectionByNameAndOrdinalSelectors) { + auto leaf = field_id_col("leaf", 3, i32(), 0); + auto nested = struct_col("nested", 2, {leaf}, 1); + auto first = field_id_col("first", 1, str(), 0); + const std::vector children = {first, nested}; + + const std::vector by_name = { + {.by_name = true, .name = "nested", .ordinal = 0}, + {.by_name = true, .name = "leaf", .ordinal = 0}, + }; + LocalColumnIndex named_projection; + ASSERT_TRUE(build_file_child_projection_from_schema(children, by_name, &named_projection).ok()); + EXPECT_EQ(named_projection.local_id(), 1); + ASSERT_EQ(named_projection.children.size(), 1); + EXPECT_EQ(named_projection.children[0].local_id(), 0); + + const std::vector by_ordinal = { + {.by_name = false, .name = "", .ordinal = 2}, + {.by_name = false, .name = "", .ordinal = 1}, + }; + LocalColumnIndex ordinal_projection; + ASSERT_TRUE(build_file_child_projection_from_schema(children, by_ordinal, &ordinal_projection) + .ok()); + EXPECT_EQ(ordinal_projection.local_id(), 1); + ASSERT_EQ(ordinal_projection.children.size(), 1); + EXPECT_EQ(ordinal_projection.children[0].local_id(), 0); +} + +TEST(ColumnMapperNestedHelperTest, MergesPredicateFiltersForSameNestedTarget) { + FileColumnPredicateFilter gt_filter; + gt_filter.target = FileNestedPredicateTarget( + LocalColumnId(7), std::make_unique(2, "score")); + gt_filter.file_column_id = LocalColumnId(7); + gt_filter.file_child_id_path = {2}; + gt_filter.predicates.push_back(create_comparison_predicate( + 7, "score", i32(), Field::create_field(10), false)); + + FileColumnPredicateFilter lt_filter; + lt_filter.target = FileNestedPredicateTarget( + LocalColumnId(7), std::make_unique(2, "score")); + lt_filter.file_column_id = LocalColumnId(7); + lt_filter.file_child_id_path = {2}; + lt_filter.predicates.push_back(create_comparison_predicate( + 7, "score", i32(), Field::create_field(100), false)); + + std::vector filters; + merge_column_predicate_filter(std::move(gt_filter), &filters); + merge_column_predicate_filter(std::move(lt_filter), &filters); + + ASSERT_EQ(filters.size(), 1); + EXPECT_EQ(filters[0].effective_file_column_id(), LocalColumnId(7)); + EXPECT_EQ(filters[0].effective_file_child_id_path(), std::vector({2})); + ASSERT_EQ(filters[0].predicates.size(), 2); + EXPECT_EQ(target_names(filters[0].target.struct_target.get()), + std::vector({"score"})); +} + +TEST(ColumnMapperNestedHelperTest, DoesNotExtractPredicateFiltersFromOr) { + const auto int_type = i32(); + const auto struct_type = std::make_shared(DataTypes {int_type}, Strings {"a"}); + const auto slot = table_slot(0, 0, struct_type, "s"); + const auto left = int_gt(struct_element(slot, int_type, "a"), 10); + const auto right = int_gt(struct_element(slot, int_type, "a"), 20); + const auto or_expr = compound_predicate(TExprOpcode::COMPOUND_OR, left, right); + + std::vector filters; + collect_nested_column_predicate_filters(or_expr, {mapped_struct_column(5, "a", 0, int_type)}, + &filters); + + EXPECT_TRUE(filters.empty()); +} + +TEST(ColumnMapperNestedHelperTest, DoesNotExtractPredicateFiltersFromUnsupportedExpression) { + const auto int_type = i32(); + const auto struct_type = std::make_shared(DataTypes {int_type}, Strings {"a"}); + auto add_expr = std::make_shared("add", int_type); + add_expr->add_child(struct_element(table_slot(0, 0, struct_type, "s"), int_type, "a")); + add_expr->add_child(literal(int_type, Field::create_field(1))); + + std::vector filters; + collect_nested_column_predicate_filters(add_expr, {mapped_struct_column(5, "a", 0, int_type)}, + &filters); + + EXPECT_TRUE(filters.empty()); +} + +TEST(ColumnMapperNestedHelperTest, DoesNotExtractPredicateFiltersThroughUnsafeCast) { + const auto file_type = i64(); + const auto table_type = i32(); + const auto struct_type = std::make_shared(DataTypes {file_type}, Strings {"a"}); + const auto nested_leaf = struct_element(table_slot(0, 0, struct_type, "s"), file_type, "a"); + const auto filter_expr = int_gt(cast_expr(nested_leaf, table_type), 10); + + std::vector filters; + collect_nested_column_predicate_filters(filter_expr, + {mapped_struct_column(5, "a", 0, file_type)}, &filters); + + EXPECT_TRUE(filters.empty()); +} + +// ---------------------------------------------------------------------- +// collect_nested_struct_paths() helper tests. +// These tests assert the entry helper for nested scan projection: it only discovers +// table-side struct paths. Later localization decides whether to build pruning predicates. +// ---------------------------------------------------------------------- + +TEST(ColumnMapperCollectNestedStructPathsTest, CollectsNameOrdinalAndBooleanSelectors) { + const auto leaf_type = i32(); + const auto inner_type = + std::make_shared(DataTypes {leaf_type, leaf_type}, Strings {"x", "y"}); + const auto root_type = std::make_shared(DataTypes {inner_type, leaf_type}, + Strings {"nested", "missing"}); + const auto root = table_slot(0, 3, root_type, "s"); + + const auto nested_by_ordinal = struct_element_by_selector( + struct_element_by_selector(root, inner_type, + literal(i32(), Field::create_field(1))), + leaf_type, literal(i32(), Field::create_field(2))); + auto paths = collect_paths(nested_by_ordinal); + ASSERT_EQ(paths.size(), 1); + expect_path_root(paths[0], 3); + ASSERT_EQ(paths[0].selectors.size(), 2); + expect_ordinal_selector(paths[0].selectors[0], 1); + expect_ordinal_selector(paths[0].selectors[1], 2); + + const std::vector positive_ordinal_selectors = { + literal(std::make_shared(), + Field::create_field(static_cast(1))), + literal(std::make_shared(), + Field::create_field(static_cast(2))), + literal(i32(), Field::create_field(3)), + literal(i64(), Field::create_field(4)), + literal(u8(), Field::create_field(true)), + }; + for (size_t idx = 0; idx < positive_ordinal_selectors.size(); ++idx) { + const auto selected = + struct_element_by_selector(root, leaf_type, positive_ordinal_selectors[idx]); + paths = collect_paths(selected); + ASSERT_EQ(paths.size(), 1); + ASSERT_EQ(paths[0].selectors.size(), 1); + expect_ordinal_selector(paths[0].selectors[0], idx == 4 ? 1 : idx + 1); + } + + paths = collect_paths(struct_element(root, leaf_type, "missing")); + ASSERT_EQ(paths.size(), 1); + ASSERT_EQ(paths[0].selectors.size(), 1); + expect_name_selector(paths[0].selectors[0], "missing"); +} + +TEST(ColumnMapperCollectNestedStructPathsTest, IgnoresInvalidSelectorsAndNonPathRoots) { + const auto leaf_type = i32(); + const auto root_type = std::make_shared(DataTypes {leaf_type}, Strings {"a"}); + const auto root = table_slot(0, 0, root_type, "s"); + + const std::vector invalid_selectors = { + literal(i32(), Field::create_field(0)), + literal(i32(), Field::create_field(-1)), + literal(u8(), Field::create_field(false)), + literal(f32(), Field::create_field(1.0F)), + literal(f64(), Field::create_field(1.0)), + table_slot(1, 1, i32(), "selector"), + }; + for (const auto& selector : invalid_selectors) { + EXPECT_TRUE(collect_paths(struct_element_by_selector(root, leaf_type, selector)).empty()); + } + + auto wrong_arity = std::make_shared("struct_element", leaf_type); + wrong_arity->add_child(root); + EXPECT_TRUE(collect_paths(wrong_arity).empty()); + + auto not_struct_element = std::make_shared("other_function", leaf_type); + not_struct_element->add_child(root); + not_struct_element->add_child(literal(str(), Field::create_field("a"))); + EXPECT_TRUE(collect_paths(not_struct_element).empty()); + + EXPECT_TRUE(collect_paths(struct_element(literal(str(), Field::create_field("x")), + leaf_type, "a")) + .empty()); + EXPECT_TRUE(collect_paths(nullptr).empty()); +} + +TEST(ColumnMapperCollectNestedStructPathsTest, RecursesThroughExpressionsAndKeepsCompletePath) { + const auto leaf_type = i32(); + const auto inner_type = std::make_shared(DataTypes {leaf_type}, Strings {"b"}); + const auto root_type = + std::make_shared(DataTypes {inner_type, leaf_type}, Strings {"a", "c"}); + const auto root = table_slot(0, 2, root_type, "s"); + const auto path_a = struct_element_by_selector( + root, inner_type, literal(str(), Field::create_field("a"))); + const auto path_ab = struct_element_by_selector( + path_a, leaf_type, literal(str(), Field::create_field("b"))); + const auto path_c = struct_element_by_selector( + root, leaf_type, literal(str(), Field::create_field("c"))); + + auto paths = collect_paths(binary_predicate( + TExprOpcode::GT, path_ab, literal(leaf_type, Field::create_field(1)))); + ASSERT_EQ(paths.size(), 1); + expect_path_root(paths[0], 2); + ASSERT_EQ(paths[0].selectors.size(), 2); + expect_name_selector(paths[0].selectors[0], "a"); + expect_name_selector(paths[0].selectors[1], "b"); + + paths = collect_paths(compound_predicate( + TExprOpcode::COMPOUND_OR, + binary_predicate(TExprOpcode::GT, path_ab, + literal(leaf_type, Field::create_field(1))), + binary_predicate(TExprOpcode::LT, path_c, + literal(leaf_type, Field::create_field(2))))); + ASSERT_EQ(paths.size(), 2); + ASSERT_EQ(paths[0].selectors.size(), 2); + ASSERT_EQ(paths[1].selectors.size(), 1); + expect_name_selector(paths[0].selectors[0], "a"); + expect_name_selector(paths[0].selectors[1], "b"); + expect_name_selector(paths[1].selectors[0], "c"); + + auto fn = std::make_shared("fn", leaf_type); + fn->add_child(path_ab); + fn->add_child(table_slot(3, 4, leaf_type, "other")); + paths = collect_paths(fn); + ASSERT_EQ(paths.size(), 1); + ASSERT_EQ(paths[0].selectors.size(), 2); + + auto if_expr = std::make_shared("if", leaf_type); + if_expr->add_child(literal(u8(), Field::create_field(true))); + if_expr->add_child(path_ab); + if_expr->add_child(path_c); + paths = collect_paths(if_expr); + ASSERT_EQ(paths.size(), 2); + + paths = collect_paths(compound_predicate(TExprOpcode::COMPOUND_AND, path_ab, path_ab)); + ASSERT_EQ(paths.size(), 2); + + paths = collect_paths(path_ab); + ASSERT_EQ(paths.size(), 1); + ASSERT_EQ(paths[0].selectors.size(), 2); +} + +TEST(ColumnMapperCollectNestedStructPathsTest, CastBehaviorSeparatesProjectionAndPruningRules) { + const auto int_type = i32(); + const auto bigint_type = i64(); + const auto float_type = f32(); + const auto double_type = f64(); + const auto decimal_small = dec32(8, 2); + const auto decimal_wide = dec32(9, 2); + const auto decimal_changed_scale = dec32(9, 3); + + const auto root_type = std::make_shared( + DataTypes {int_type, float_type, decimal_small}, Strings {"i", "f", "d"}); + const auto root = table_slot(0, 0, root_type, "s"); + const auto int_path = struct_element(root, int_type, "i"); + const auto float_path = struct_element(root, float_type, "f"); + const auto decimal_path = struct_element(root, decimal_small, "d"); + + auto paths = collect_paths(cast_expr(int_path, bigint_type)); + ASSERT_EQ(paths.size(), 1); + expect_name_selector(paths[0].selectors[0], "i"); + + paths = collect_paths(cast_expr(float_path, double_type)); + ASSERT_EQ(paths.size(), 1); + expect_name_selector(paths[0].selectors[0], "f"); + + paths = collect_paths(cast_expr(decimal_path, decimal_wide)); + ASSERT_EQ(paths.size(), 1); + expect_name_selector(paths[0].selectors[0], "d"); + + paths = collect_paths( + cast_expr(struct_element(root, make_nullable(int_type), "i"), make_nullable(int_type))); + ASSERT_EQ(paths.size(), 1); + expect_name_selector(paths[0].selectors[0], "i"); + + // Unsafe casts are not accepted as pruning paths, but collect_nested_struct_paths() still + // recurses into children so scan projection can read the column needed by row-level filters. + paths = collect_paths(cast_expr(struct_element(root, bigint_type, "i"), int_type)); + ASSERT_EQ(paths.size(), 1); + expect_name_selector(paths[0].selectors[0], "i"); + + paths = collect_paths(cast_expr(decimal_path, decimal_changed_scale)); + ASSERT_EQ(paths.size(), 1); + expect_name_selector(paths[0].selectors[0], "d"); + + EXPECT_TRUE(collect_paths(cast_expr(table_slot(1, 1, int_type, "plain"), bigint_type)).empty()); +} + +TEST(ColumnMapperCollectNestedStructPathsTest, ProjectionMergeKeepsFilterOnlyPathAndDeduplicates) { + const auto int_type = i32(); + const auto string_type = str(); + auto table_a = name_col("a", int_type); + auto table_b = name_col("b", int_type); + auto table_output = struct_name_col("s", {table_a}); + auto full_table_struct = struct_name_col("s", {table_a, table_b}); + + auto file_a = name_col("a", int_type, 0); + auto file_b = name_col("b", int_type, 1); + auto file_struct = struct_name_col("s", {file_a, file_b, name_col("c", string_type, 2)}, 5); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_output}, {}, {file_struct}).ok()); + + const auto path_b = + struct_element(table_slot(0, 0, full_table_struct.type, "s"), int_type, "b"); + auto filter_expr = compound_predicate( + TExprOpcode::COMPOUND_AND, + binary_predicate(TExprOpcode::GT, path_b, + literal(int_type, Field::create_field(1))), + binary_predicate(TExprOpcode::LT, path_b, + literal(int_type, Field::create_field(10)))); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_output}, &request).ok()); + + EXPECT_TRUE(request.non_predicate_columns.empty()); + ASSERT_EQ(request.predicate_columns.size(), 1); + EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(5)); + ASSERT_FALSE(request.predicate_columns[0].project_all_children); + EXPECT_EQ(projection_ids(request.predicate_columns[0].children), std::vector({0, 1})); + ASSERT_EQ(request.column_predicate_filters.size(), 1); + EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(), + std::vector({1})); + ASSERT_EQ(request.column_predicate_filters[0].predicates.size(), 2); +} + +// Scenario: row-oriented readers such as CSV/Text cannot lazy-read predicate columns separately. +// For a complex root that is both projected and referenced by a filter, the materialized mapper +// keeps one non-predicate scan entry and asks the reader to read the full top-level struct. +TEST(ColumnMapperScanRequestTest, MaterializedMapperUsesSingleScanColumnList) { + const auto int_type = i32(); + const auto string_type = str(); + auto table_a = name_col("a", int_type, 0); + auto table_b = name_col("b", int_type, 1); + auto full_table_struct = struct_name_col("s", {table_a, table_b}); + auto table_output = struct_name_col("s", {table_a}); + + auto file_a = name_col("a", int_type, 0); + auto file_b = name_col("b", int_type, 1); + auto file_struct = struct_name_col("s", {file_a, file_b, name_col("c", string_type, 2)}, 5); + + MaterializedColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_output}, {}, {file_struct}).ok()); + + const auto path_b = + struct_element(table_slot(0, 0, full_table_struct.type, "s"), int_type, "b"); + auto filter_expr = binary_predicate(TExprOpcode::GT, path_b, + literal(int_type, Field::create_field(1))); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_output}, &request).ok()); + + EXPECT_TRUE(request.predicate_columns.empty()); + ASSERT_EQ(request.non_predicate_columns.size(), 1); + EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(5)); + EXPECT_TRUE(request.non_predicate_columns[0].project_all_children); + EXPECT_TRUE(request.non_predicate_columns[0].children.empty()); + EXPECT_TRUE(request.column_predicate_filters.empty()); +} + +// Scenario: a FileReader must expose semantic children for complex file columns. If it returns a +// complex DataType but leaves ColumnDefinition::children empty, mapper should return a diagnostic +// error instead of aborting inside ARRAY/MAP/STRUCT child lookup. +TEST(ColumnMapperScanRequestTest, MalformedComplexFileSchemaReturnsError) { + const auto int_type = i32(); + const auto string_type = str(); + auto table_a = name_col("a", int_type, 0); + auto table_b = name_col("b", string_type, 1); + auto table_struct = struct_name_col("s", {table_a, table_b}); + auto file_struct_type = + std::make_shared(DataTypes {int_type, string_type}, Strings {"a", "b"}); + auto malformed_file_struct = name_col("s", file_struct_type, 5); + + MaterializedColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + const auto status = mapper.create_mapping({table_struct}, {}, {malformed_file_struct}); + + ASSERT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("Malformed complex file schema"), std::string::npos) + << status; +} + +// Scenario: when the projected table schema contains the child referenced by the filter, the +// materialized mapper can still rewrite the table-level struct child predicate into a file-local +// conjunct. It remains a single full-root scan column; only the expression is localized. +TEST(ColumnMapperScanRequestTest, MaterializedMapperLocalizesMappedStructChildConjunct) { + const auto int_type = i32(); + const auto string_type = str(); + auto table_a = name_col("a", int_type, 0); + auto table_b = name_col("b", int_type, 1); + auto table_struct = struct_name_col("s", {table_a, table_b}); + + auto file_a = name_col("a", int_type, 0); + auto file_b = name_col("b", int_type, 1); + auto file_struct = struct_name_col("s", {file_a, file_b, name_col("c", string_type, 2)}, 5); + + MaterializedColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok()); + + const auto path_b = struct_element(table_slot(0, 0, table_struct.type, "s"), int_type, "b"); + auto filter_expr = binary_predicate(TExprOpcode::GT, path_b, + literal(int_type, Field::create_field(1))); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok()); + + EXPECT_TRUE(request.predicate_columns.empty()); + ASSERT_EQ(request.non_predicate_columns.size(), 1); + EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(5)); + EXPECT_TRUE(request.non_predicate_columns[0].project_all_children); + EXPECT_TRUE(request.non_predicate_columns[0].children.empty()); + EXPECT_TRUE(request.column_predicate_filters.empty()); + ASSERT_EQ(request.conjuncts.size(), 1); +} + +// Scenario: even output-only partial complex projections such as `SELECT s.a` must scan the full +// top-level struct for materialized readers, because delimited text formats cannot physically read +// only one nested child from a single text field. +TEST(ColumnMapperScanRequestTest, MaterializedMapperScansFullComplexRootForOutputOnlyProjection) { + const auto int_type = i32(); + const auto string_type = str(); + auto table_a = name_col("a", int_type, 0); + auto table_output = struct_name_col("s", {table_a}); + + auto file_a = name_col("a", int_type, 0); + auto file_b = name_col("b", int_type, 1); + auto file_struct = struct_name_col("s", {file_a, file_b, name_col("c", string_type, 2)}, 5); + + MaterializedColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_output}, {}, {file_struct}).ok()); + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_output}, &request).ok()); + + ASSERT_EQ(request.non_predicate_columns.size(), 1); + EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(5)); + EXPECT_TRUE(request.non_predicate_columns[0].project_all_children); + EXPECT_TRUE(request.non_predicate_columns[0].children.empty()); + EXPECT_TRUE(request.predicate_columns.empty()); + EXPECT_TRUE(request.column_predicate_filters.empty()); +} + +// Scenario: array/map nested projections also scan the full top-level complex root for +// materialized readers. This keeps row-oriented formats from receiving Parquet-style partial +// projections for `array` elements or map value structs. +TEST(ColumnMapperScanRequestTest, MaterializedMapperScansFullArrayAndMapRoots) { + const auto key_type = str(); + const auto int_type = i32(); + const auto string_type = str(); + + auto table_array_child = name_col("b", string_type); + auto table_array_element = struct_name_col("element", {table_array_child}); + auto table_array = array_col("items", -1, table_array_element); + table_array.identifier = Field::create_field("items"); + set_name_identifiers(&table_array, -1); + + auto file_array_a = name_col("a", int_type, 0); + auto file_array_b = name_col("b", string_type, 1); + auto file_array_element = struct_name_col("element", {file_array_a, file_array_b}, 0); + auto file_array = array_col("items", -1, file_array_element, 4); + file_array.identifier = Field::create_field("items"); + set_name_identifiers(&file_array, 4); + + auto table_value_b = name_col("b", string_type); + auto table_value = struct_name_col("value", {table_value_b}); + auto table_map = map_col("m", -1, {table_value}, key_type, table_value.type); + table_map.identifier = Field::create_field("m"); + set_name_identifiers(&table_map, -1); + + auto file_key = name_col("key", key_type, 0); + auto file_value_a = name_col("a", int_type, 0); + auto file_value_b = name_col("b", string_type, 1); + auto file_value = struct_name_col("value", {file_value_a, file_value_b}, 1); + auto file_map = map_col("m", -1, {file_key, file_value}, key_type, file_value.type, 6); + file_map.identifier = Field::create_field("m"); + set_name_identifiers(&file_map, 6); + + MaterializedColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_array, table_map}, {}, {file_array, file_map}).ok()); + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_array, table_map}, &request).ok()); + + ASSERT_EQ(request.non_predicate_columns.size(), 2); + EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(4)); + EXPECT_TRUE(request.non_predicate_columns[0].project_all_children); + EXPECT_TRUE(request.non_predicate_columns[0].children.empty()); + EXPECT_EQ(request.non_predicate_columns[1].column_id(), LocalColumnId(6)); + EXPECT_TRUE(request.non_predicate_columns[1].project_all_children); + EXPECT_TRUE(request.non_predicate_columns[1].children.empty()); + EXPECT_TRUE(request.predicate_columns.empty()); + EXPECT_TRUE(request.column_predicate_filters.empty()); +} + +// ---------------------------------------------------------------------- +// L1 create_mapping root matching tests. +// These cases cover the three supported root matching modes and the +// missing/default behavior that each mode feeds into later scan requests. +// ---------------------------------------------------------------------- + +TEST(ColumnMapperCreateMappingTest, ByNameMatchesCaseIdentifierAndAliases) { + const auto int_type = i32(); + const std::vector table_schema = { + name_col("ID", int_type), + name_id_col("renamed", "legacy_name", int_type), + [] { + auto column = name_col("current_alias", i32()); + column.name_mapping = {"old_alias"}; + return column; + }(), + name_col("file_alias", int_type), + }; + std::vector file_schema = { + name_col("id", int_type, 0), + name_col("legacy_name", int_type, 1), + name_col("old_alias", int_type, 2), + [] { + auto column = name_col("physical_name", i32(), 3); + column.name_mapping = {"file_alias"}; + return column; + }(), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + ASSERT_EQ(mapper.mappings().size(), 4); + expect_mapping(mapper.mappings()[0], 0, "ID", 0, "id", int_type, int_type); + expect_mapping(mapper.mappings()[1], 1, "renamed", 1, "legacy_name", int_type, int_type); + expect_mapping(mapper.mappings()[2], 2, "current_alias", 2, "old_alias", int_type, int_type); + expect_mapping(mapper.mappings()[3], 3, "file_alias", 3, "physical_name", int_type, int_type); +} + +TEST(ColumnMapperCreateMappingTest, ByNameUsesFirstMatchingFileFieldWhenAmbiguous) { + const auto int_type = i32(); + const std::vector table_schema = { + name_col("id", int_type), + }; + const std::vector file_schema = { + name_col("ID", int_type, 0), + name_col("id", int_type, 1), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + ASSERT_EQ(mapper.mappings().size(), 1); + expect_mapping(mapper.mappings()[0], 0, "id", 0, "ID", int_type, int_type); +} + +TEST(ColumnMapperCreateMappingTest, TimestampTzScaleMismatchDoesNotAddFinalizeCast) { + // Scenario: HDFS TVF may expose a table slot as TIMESTAMPTZ(0), while a Parquet logical UTC + // timestamp file schema is materialized as TIMESTAMPTZ(6). Finalization must not add a SQL + // cast from scale 6 to scale 0, because that cast rounds fractional seconds: + // 2025-06-01 12:34:56.789+08:00 -> 2025-06-01 12:34:57+08:00 + // Reader finalization should pass the column through; the output slot type controls display + // scale and hides the fractional part without changing the stored instant. + const auto table_type = timestamptz(0); + const auto file_type = timestamptz(6); + const std::vector table_schema = {name_col("ts_tz", table_type)}; + const std::vector file_schema = {name_col("ts_tz", file_type, 0)}; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + ASSERT_EQ(mapper.mappings().size(), 1); + expect_mapping(mapper.mappings()[0], 0, "ts_tz", 0, "ts_tz", file_type, table_type); + EXPECT_TRUE(mapper.mappings()[0].is_trivial); + EXPECT_EQ(mapper.mappings()[0].filter_conversion, FilterConversionType::COPY_DIRECTLY); +} + +TEST(ColumnMapperCreateMappingTest, ByNameUsesNameMappingForRenamedColumn) { + const auto int_type = i32(); + auto table_column = name_col("current_id", int_type); + table_column.name_mapping = {"legacy_id"}; + const std::vector file_schema = { + name_col("legacy_id", int_type, 0), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_column}, {}, file_schema).ok()); + + ASSERT_EQ(mapper.mappings().size(), 1); + expect_mapping(mapper.mappings()[0], 0, "current_id", 0, "legacy_id", int_type, int_type); +} + +TEST(ColumnMapperCreateMappingTest, ByNameUsesNameMappingForNestedSchemaEvolution) { + const auto int_type = i32(); + const auto string_type = str(); + + auto table_country = name_col("country", string_type); + table_country.name_mapping = {"old_country"}; + auto table_city = name_col("city", string_type); + auto table_struct = struct_name_col("struct_column", {table_country, table_city}); + set_name_identifiers(&table_struct, -1); + + auto table_item = name_col("item", string_type); + table_item.name_mapping = {"product"}; + auto table_quantity = name_col("quantity", int_type); + auto table_element = struct_name_col("element", {table_item, table_quantity}); + auto table_array = array_col("array_column", -1, table_element); + set_name_identifiers(&table_array, -1); + + auto table_key = name_col("key", string_type); + auto table_full_name = name_col("full_name", string_type); + table_full_name.name_mapping = {"name"}; + auto table_age = name_col("age", int_type); + auto table_value = struct_name_col("value", {table_full_name, table_age}); + auto table_map = + map_col("new_map_column", -1, {table_key, table_value}, string_type, table_value.type); + table_map.name_mapping = {"map_column"}; + set_name_identifiers(&table_map, -1); + + auto file_old_country = name_col("old_country", string_type, 0); + auto file_city = name_col("city", string_type, 1); + auto file_struct = struct_name_col("struct_column", {file_old_country, file_city}, 3); + set_name_identifiers(&file_struct, 3); + + auto file_product = name_col("product", string_type, 0); + auto file_element = struct_name_col("list", {file_product}, 0); + auto file_array = array_col("array_column", -1, file_element, 4); + set_name_identifiers(&file_array, 4); + + auto file_key = name_col("key", string_type, 0); + auto file_name = name_col("name", string_type, 0); + auto file_age = name_col("age", int_type, 1); + auto file_value = struct_name_col("value", {file_name, file_age}, 1); + auto file_map = + map_col("map_column", -1, {file_key, file_value}, string_type, file_value.type, 5); + set_name_identifiers(&file_map, 5); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_struct, table_array, table_map}, {}, + {file_struct, file_array, file_map}) + .ok()); + + ASSERT_EQ(mapper.mappings().size(), 3); + const auto& struct_mapping = mapper.mappings()[0]; + expect_mapping(struct_mapping, 0, "struct_column", 3, "struct_column", file_struct.type, + table_struct.type); + ASSERT_EQ(struct_mapping.child_mappings.size(), 2); + EXPECT_EQ(struct_mapping.child_mappings[0].file_column_name, "old_country"); + EXPECT_EQ(*struct_mapping.child_mappings[0].file_local_id, 0); + EXPECT_EQ(struct_mapping.child_mappings[1].file_column_name, "city"); + EXPECT_EQ(*struct_mapping.child_mappings[1].file_local_id, 1); + + const auto& array_mapping = mapper.mappings()[1]; + expect_mapping(array_mapping, 1, "array_column", 4, "array_column", file_array.type, + table_array.type); + ASSERT_EQ(array_mapping.child_mappings.size(), 1); + const auto& element_mapping = array_mapping.child_mappings[0]; + EXPECT_EQ(element_mapping.file_column_name, "list"); + EXPECT_EQ(*element_mapping.file_local_id, 0); + ASSERT_EQ(element_mapping.child_mappings.size(), 2); + EXPECT_EQ(element_mapping.child_mappings[0].file_column_name, "product"); + EXPECT_EQ(*element_mapping.child_mappings[0].file_local_id, 0); + expect_missing(element_mapping.child_mappings[1]); + + const auto& map_mapping = mapper.mappings()[2]; + expect_mapping(map_mapping, 2, "new_map_column", 5, "map_column", file_map.type, + table_map.type); + ASSERT_EQ(map_mapping.child_mappings.size(), 2); + EXPECT_EQ(map_mapping.child_mappings[0].file_column_name, "key"); + EXPECT_EQ(*map_mapping.child_mappings[0].file_local_id, 0); + const auto& value_mapping = map_mapping.child_mappings[1]; + EXPECT_EQ(value_mapping.file_column_name, "value"); + EXPECT_EQ(*value_mapping.file_local_id, 1); + ASSERT_EQ(value_mapping.child_mappings.size(), 2); + EXPECT_EQ(value_mapping.child_mappings[0].file_column_name, "name"); + EXPECT_EQ(*value_mapping.child_mappings[0].file_local_id, 0); + EXPECT_EQ(value_mapping.child_mappings[1].file_column_name, "age"); + EXPECT_EQ(*value_mapping.child_mappings[1].file_local_id, 1); +} + +// Scenario: SELECT * can carry only the full complex DataType without expanded nested +// ColumnDefinitions. When an old file has map value STRUCT and the table type is +// STRUCT, the mapper must still build child mappings instead of letting +// TableReader cast between incompatible struct shapes. +TEST(ColumnMapperCreateMappingTest, SynthesizesMissingMapValueStructChildrenFromType) { + const auto int_type = i32(); + const auto string_type = str(); + const auto table_value_type = std::make_shared( + DataTypes {int_type, string_type, string_type}, Strings {"age", "full_name", "gender"}); + const auto file_value_type = std::make_shared(DataTypes {int_type, string_type}, + Strings {"age", "name"}); + + auto table_map = name_col("new_map_column", + std::make_shared(string_type, table_value_type)); + table_map.name_mapping = {"map_column"}; + set_name_identifiers(&table_map, -1); + + auto file_age = name_col("age", int_type, 0); + auto file_name = name_col("name", string_type, 1); + auto file_value = struct_name_col("value", {file_age, file_name}, 1); + auto file_key = name_col("key", string_type, 0); + auto file_map = + map_col("map_column", -1, {file_key, file_value}, string_type, file_value_type, 5); + set_name_identifiers(&file_map, 5); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok()); + + ASSERT_EQ(mapper.mappings().size(), 1); + const auto& map_mapping = mapper.mappings()[0]; + ASSERT_EQ(map_mapping.child_mappings.size(), 2); + EXPECT_EQ(map_mapping.child_mappings[0].table_column_name, "key"); + EXPECT_EQ(map_mapping.child_mappings[0].file_column_name, "key"); + EXPECT_EQ(*map_mapping.child_mappings[0].file_local_id, 0); + + const auto& value_mapping = map_mapping.child_mappings[1]; + EXPECT_EQ(value_mapping.table_column_name, "value"); + EXPECT_EQ(value_mapping.file_column_name, "value"); + EXPECT_EQ(*value_mapping.file_local_id, 1); + ASSERT_EQ(value_mapping.child_mappings.size(), 3); + EXPECT_EQ(value_mapping.child_mappings[0].table_column_name, "age"); + EXPECT_EQ(value_mapping.child_mappings[0].file_column_name, "age"); + EXPECT_EQ(*value_mapping.child_mappings[0].file_local_id, 0); + EXPECT_EQ(value_mapping.child_mappings[1].table_column_name, "full_name"); + EXPECT_EQ(value_mapping.child_mappings[1].file_column_name, "name"); + EXPECT_EQ(*value_mapping.child_mappings[1].file_local_id, 1); + EXPECT_EQ(value_mapping.child_mappings[2].table_column_name, "gender"); + expect_missing(value_mapping.child_mappings[2]); + EXPECT_FALSE(value_mapping.is_trivial); +} + +// Scenario: MAP_KEYS(new_map_column) may build a key-only nested projection, while SELECT * still +// needs the whole map root. The mapper must add a synthetic value child and recursively map the old +// value struct instead of treating Struct(name, age) as a leaf to CAST into the table value struct. +TEST(ColumnMapperCreateMappingTest, KeyOnlyMapProjectionStillMapsEvolvedValueStruct) { + const auto int_type = i32(); + const auto string_type = str(); + const auto table_value_type = std::make_shared( + DataTypes {int_type, string_type, string_type}, Strings {"age", "full_name", "gender"}); + const auto file_value_type = std::make_shared(DataTypes {string_type, int_type}, + Strings {"name", "age"}); + + auto table_key = name_col("key", string_type); + auto table_map = map_col("new_map_column", -1, {table_key}, string_type, table_value_type); + table_map.name_mapping = {"map_column"}; + set_name_identifiers(&table_map, -1); + + auto file_key = name_col("key", string_type, 0); + auto file_name = name_col("name", string_type, 0); + auto file_age = name_col("age", int_type, 1); + auto file_value = struct_name_col("value", {file_name, file_age}, 1); + auto file_map = + map_col("map_column", -1, {file_key, file_value}, string_type, file_value_type, 5); + set_name_identifiers(&file_map, 5); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok()); + + ASSERT_EQ(mapper.mappings().size(), 1); + const auto& map_mapping = mapper.mappings()[0]; + ASSERT_EQ(map_mapping.child_mappings.size(), 2); + EXPECT_EQ(map_mapping.child_mappings[0].table_column_name, "key"); + EXPECT_EQ(map_mapping.child_mappings[0].file_column_name, "key"); + EXPECT_EQ(*map_mapping.child_mappings[0].file_local_id, 0); + + const auto& value_mapping = map_mapping.child_mappings[1]; + EXPECT_EQ(value_mapping.table_column_name, "value"); + EXPECT_EQ(value_mapping.file_column_name, "value"); + EXPECT_EQ(*value_mapping.file_local_id, 1); + ASSERT_EQ(value_mapping.child_mappings.size(), 3); + EXPECT_EQ(value_mapping.child_mappings[0].table_column_name, "age"); + EXPECT_EQ(value_mapping.child_mappings[0].file_column_name, "age"); + EXPECT_EQ(*value_mapping.child_mappings[0].file_local_id, 1); + EXPECT_EQ(value_mapping.child_mappings[1].table_column_name, "full_name"); + EXPECT_EQ(value_mapping.child_mappings[1].file_column_name, "name"); + EXPECT_EQ(*value_mapping.child_mappings[1].file_local_id, 0); + EXPECT_EQ(value_mapping.child_mappings[2].table_column_name, "gender"); + expect_missing(value_mapping.child_mappings[2]); + EXPECT_FALSE(value_mapping.is_trivial); +} + +// Scenario: Iceberg uses field-id mapping, but a key-only map projection may force the mapper to +// synthesize the missing value struct from DataType names, which do not carry field ids. The mapper +// must name-match synthesized children before ordinal fallback, otherwise `age` would read old +// file child `name` and the later materialization would build the value struct incorrectly. +TEST(ColumnMapperCreateMappingTest, + KeyOnlyMapProjectionSynthesizedValueStructNameMatchesBeforeOrdinalFallback) { + const auto int_type = i32(); + const auto string_type = str(); + const auto table_value_type = std::make_shared( + DataTypes {int_type, string_type, string_type}, Strings {"age", "full_name", "gender"}); + const auto file_value_type = std::make_shared(DataTypes {string_type, int_type}, + Strings {"name", "age"}); + + auto table_key = field_id_col("key", 10, string_type, 0); + auto table_map = map_col("new_map_column", 2, {table_key}, string_type, table_value_type); + + auto file_key = field_id_col("key", 10, string_type, 0); + auto file_name = field_id_col("name", 7, string_type, 0); + auto file_age = field_id_col("age", 8, int_type, 1); + auto file_value = struct_col("value", 11, {file_name, file_age}, 1); + auto file_map = + map_col("new_map_column", 2, {file_key, file_value}, string_type, file_value_type, 5); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok()); + + ASSERT_EQ(mapper.mappings().size(), 1); + const auto& map_mapping = mapper.mappings()[0]; + ASSERT_EQ(map_mapping.child_mappings.size(), 2); + EXPECT_EQ(map_mapping.child_mappings[0].table_column_name, "key"); + EXPECT_EQ(map_mapping.child_mappings[0].file_column_name, "key"); + EXPECT_EQ(*map_mapping.child_mappings[0].file_local_id, 0); + + const auto& value_mapping = map_mapping.child_mappings[1]; + EXPECT_EQ(value_mapping.table_column_name, "value"); + EXPECT_EQ(value_mapping.file_column_name, "value"); + EXPECT_EQ(*value_mapping.file_local_id, 1); + ASSERT_EQ(value_mapping.child_mappings.size(), 3); + EXPECT_EQ(value_mapping.child_mappings[0].table_column_name, "age"); + EXPECT_EQ(value_mapping.child_mappings[0].file_column_name, "age"); + EXPECT_EQ(*value_mapping.child_mappings[0].file_local_id, 1); + EXPECT_EQ(value_mapping.child_mappings[1].table_column_name, "full_name"); + EXPECT_EQ(value_mapping.child_mappings[1].file_column_name, "name"); + EXPECT_EQ(*value_mapping.child_mappings[1].file_local_id, 0); + EXPECT_EQ(value_mapping.child_mappings[2].table_column_name, "gender"); + expect_missing(value_mapping.child_mappings[2]); + EXPECT_FALSE(value_mapping.is_trivial); +} + +TEST(ColumnMapperCreateMappingTest, ByFieldIdDoesNotFallbackToNameAndUsesFirstDuplicate) { + const auto int_type = i32(); + const std::vector table_schema = { + field_id_col("renamed", 10, int_type), + name_col("same_name", int_type), + field_id_col("negative", -7, int_type), + }; + const std::vector file_schema = { + field_id_col("first", 10, int_type, 0), + field_id_col("second", 10, int_type, 1), + field_id_col("same_name", 99, int_type, 2), + field_id_col("negative_file", -7, int_type, 3), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + ASSERT_EQ(mapper.mappings().size(), 3); + expect_mapping(mapper.mappings()[0], 0, "renamed", 0, "first", int_type, int_type); + expect_missing(mapper.mappings()[1]); + expect_mapping(mapper.mappings()[2], 2, "negative", 3, "negative_file", int_type, int_type); +} + +// Scenario: Iceberg TopN lazy materialization uses BY_FIELD_ID for schema evolution and also asks +// the file reader to synthesize GLOBAL_ROWID. GLOBAL_ROWID is matched by ColumnType before the +// field-id matcher, so keeping BY_FIELD_ID does not make the mapper look for a numeric field id for +// that virtual column. +TEST(ColumnMapperCreateMappingTest, ByFieldIdMapsGlobalRowIdByVirtualColumnType) { + const auto int_type = i32(); + auto table_rowid = global_rowid_column_definition(); + table_rowid.name = BeConsts::GLOBAL_ROWID_COL + "equality_delete_par_1"; + table_rowid.identifier = Field::create_field(table_rowid.name); + + const std::vector table_schema = { + field_id_col("new_new_id", 1, int_type), + table_rowid, + }; + const std::vector file_schema = { + field_id_col("id", 1, int_type, 0), + global_rowid_column_definition(), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + ASSERT_EQ(mapper.mappings().size(), 2); + expect_mapping(mapper.mappings()[0], 0, "new_new_id", 0, "id", int_type, int_type); + expect_mapping(mapper.mappings()[1], 1, table_rowid.name, GLOBAL_ROWID_COLUMN_ID, + BeConsts::GLOBAL_ROWID_COL, str(), str()); +} + +TEST(ColumnMapperCreateMappingTest, ByFieldIdTreatsSameNameDifferentFieldIdAsMissing) { + const auto int_type = i32(); + const std::vector table_schema = { + field_id_col("same_name", 10, int_type), + }; + const std::vector file_schema = { + field_id_col("same_name", 20, int_type, 0), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + const auto status = mapper.create_mapping(table_schema, {}, file_schema); + ASSERT_TRUE(status.ok()) << status.to_string(); + + ASSERT_EQ(mapper.mappings().size(), 1); + expect_missing(mapper.mappings()[0]); +} + +TEST(ColumnMapperCreateMappingTest, NestedFieldIdTreatsSameNameDifferentFieldIdAsMissing) { + const auto int_type = i32(); + auto table_child = field_id_col("child", 10, int_type); + auto table_root = struct_col("root", 1, {table_child}); + + auto file_child = field_id_col("child", 20, int_type, 0); + auto file_root = struct_col("root", 1, {file_child}, 0); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + const auto status = mapper.create_mapping({table_root}, {}, {file_root}); + ASSERT_TRUE(status.ok()) << status.to_string(); + + ASSERT_EQ(mapper.mappings().size(), 1); + expect_mapping(mapper.mappings()[0], 0, "root", 0, "root", file_root.type, table_root.type); + ASSERT_EQ(mapper.mappings()[0].child_mappings.size(), 1); + expect_missing(mapper.mappings()[0].child_mappings[0]); +} + +TEST(ColumnMapperCreateMappingTest, ByIndexMapsTopLevelColumnsByPositionIgnoringFileNames) { + const auto int_type = i32(); + const auto string_type = str(); + const std::vector table_schema = { + position_col("user_id", 0, int_type), + position_col("user_name", 1, string_type), + position_col("age", 2, int_type), + }; + const std::vector file_schema = { + field_id_col("_col0", 100, int_type, 0), + field_id_col("_col1", 101, string_type, 1), + field_id_col("_col2", 102, int_type, 2), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + ASSERT_EQ(mapper.mappings().size(), 3); + expect_mapping(mapper.mappings()[0], 0, "user_id", 0, "_col0", int_type, int_type); + expect_mapping(mapper.mappings()[1], 1, "user_name", 1, "_col1", string_type, string_type); + expect_mapping(mapper.mappings()[2], 2, "age", 2, "_col2", int_type, int_type); +} + +TEST(ColumnMapperCreateMappingTest, ByIndexSupportsSparseProjection) { + const auto int_type = i32(); + const std::vector table_schema = { + position_col("age", 2, int_type), + position_col("score", 4, int_type), + }; + const std::vector file_schema = { + field_id_col("_col0", 100, int_type, 0), field_id_col("_col1", 101, int_type, 1), + field_id_col("_col2", 102, int_type, 2), field_id_col("_col3", 103, int_type, 3), + field_id_col("_col4", 104, int_type, 4), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + ASSERT_EQ(mapper.mappings().size(), 2); + expect_mapping(mapper.mappings()[0], 0, "age", 2, "_col2", int_type, int_type); + expect_mapping(mapper.mappings()[1], 1, "score", 4, "_col4", int_type, int_type); +} + +TEST(ColumnMapperCreateMappingTest, + ByIndexMatchesNestedStructChildrenByNameEvenWhenChildrenHaveFieldIds) { + const auto int_type = i32(); + const auto string_type = str(); + // Hive positional mapping only applies to top-level columns. FE/history schema metadata can + // still put field-id style integer identifiers on nested struct children. Those nested + // identifiers must not be interpreted as file positions. + auto table_root = struct_col("profile", 1, + { + field_id_col("id", 100, int_type), + field_id_col("name", 101, string_type), + }); + // Reverse the file child order so a wrong positional match either misses the child or reads + // the wrong physical child. The expected mapping below proves the children are matched by name. + auto file_root = struct_name_col("_col1", + { + name_col("name", string_type, 0), + name_col("id", int_type, 1), + }, + 1); + const std::vector table_schema = {table_root}; + const std::vector file_schema = { + field_id_col("_col0", 1000, string_type, 0), + file_root, + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX}); + const auto status = mapper.create_mapping(table_schema, {}, file_schema); + ASSERT_TRUE(status.ok()) << status.to_string(); + + ASSERT_EQ(mapper.mappings().size(), 1); + expect_mapping(mapper.mappings()[0], 0, "profile", 1, "_col1", file_root.type, table_root.type); + ASSERT_EQ(mapper.mappings()[0].child_mappings.size(), 2); + expect_mapping(mapper.mappings()[0].child_mappings[0], 0, "id", 1, "id", int_type, int_type); + expect_mapping(mapper.mappings()[0].child_mappings[1], 0, "name", 0, "name", string_type, + string_type); +} + +TEST(ColumnMapperCreateMappingTest, ByIndexNestedStructDoesNotUseChildOrdinalIdentifier) { + const auto int_type = i32(); + const auto string_type = str(); + // This is the dangerous variant of the previous case: the nested integer identifiers happen + // to be valid child ordinals. BY_INDEX must still ignore them below the top-level root. + auto table_root = struct_col("profile", 1, + { + field_id_col("id", 0, int_type), + field_id_col("name", 1, string_type), + }); + // If the implementation uses child ordinal matching, id/name will be swapped here. + auto file_root = struct_name_col("_col1", + { + name_col("name", string_type, 0), + name_col("id", int_type, 1), + }, + 1); + const std::vector table_schema = {table_root}; + const std::vector file_schema = { + field_id_col("_col0", 1000, string_type, 0), + file_root, + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX}); + const auto status = mapper.create_mapping(table_schema, {}, file_schema); + ASSERT_TRUE(status.ok()) << status.to_string(); + + ASSERT_EQ(mapper.mappings().size(), 1); + expect_mapping(mapper.mappings()[0], 0, "profile", 1, "_col1", file_root.type, table_root.type); + ASSERT_EQ(mapper.mappings()[0].child_mappings.size(), 2); + expect_mapping(mapper.mappings()[0].child_mappings[0], 0, "id", 1, "id", int_type, int_type); + expect_mapping(mapper.mappings()[0].child_mappings[1], 0, "name", 0, "name", string_type, + string_type); +} + +TEST(ColumnMapperCreateMappingTest, ByIndexArrayElementStructChildrenMatchByName) { + const auto int_type = i32(); + const auto string_type = str(); + // The top-level ARRAY column is selected by file position. After that, ARRAY has a single + // structural child, and the element STRUCT should use Hive's nested-by-name behavior. + auto table_element = struct_col("element", 10, + { + field_id_col("id", 100, int_type), + field_id_col("name", 101, string_type), + }); + auto table_root = array_col("profiles", 1, table_element); + // Reverse the element struct children to distinguish name matching from position matching. + auto file_element = struct_name_col("element", + { + name_col("name", string_type, 0), + name_col("id", int_type, 1), + }, + 0); + auto file_root = array_col("_col1", 1001, file_element, 1); + const std::vector table_schema = {table_root}; + const std::vector file_schema = { + field_id_col("_col0", 1000, string_type, 0), + file_root, + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX}); + const auto status = mapper.create_mapping(table_schema, {}, file_schema); + ASSERT_TRUE(status.ok()) << status.to_string(); + + ASSERT_EQ(mapper.mappings().size(), 1); + expect_mapping(mapper.mappings()[0], 0, "profiles", 1, "_col1", file_root.type, + table_root.type); + ASSERT_EQ(mapper.mappings()[0].child_mappings.size(), 1); + const auto& element_mapping = mapper.mappings()[0].child_mappings[0]; + expect_mapping(element_mapping, 0, "element", 0, "element", file_element.type, + table_element.type); + ASSERT_EQ(element_mapping.child_mappings.size(), 2); + expect_mapping(element_mapping.child_mappings[0], 0, "id", 1, "id", int_type, int_type); + expect_mapping(element_mapping.child_mappings[1], 0, "name", 0, "name", string_type, + string_type); +} + +TEST(ColumnMapperCreateMappingTest, ByIndexMapValueStructChildrenMatchByName) { + const auto int_type = i32(); + const auto string_type = str(); + const auto key_type = str(); + // MAP key/value are structural children, so BY_INDEX should not reinterpret their nested + // integer identifiers as arbitrary positions. The value STRUCT then follows name matching. + auto table_key = field_id_col("key", 10, key_type); + auto table_value = struct_col("value", 11, + { + field_id_col("id", 100, int_type), + field_id_col("name", 101, string_type), + }); + auto table_root = map_col("profiles", 1, {table_key, table_value}, key_type, table_value.type); + auto file_key = name_col("key", key_type, 0); + // Reverse value struct children. A positional nested match would produce name/id swapped. + auto file_value = struct_name_col("value", + { + name_col("name", string_type, 0), + name_col("id", int_type, 1), + }, + 1); + auto file_root = map_col("_col1", 1001, {file_key, file_value}, key_type, file_value.type, 1); + const std::vector table_schema = {table_root}; + const std::vector file_schema = { + field_id_col("_col0", 1000, string_type, 0), + file_root, + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX}); + const auto status = mapper.create_mapping(table_schema, {}, file_schema); + ASSERT_TRUE(status.ok()) << status.to_string(); + + ASSERT_EQ(mapper.mappings().size(), 1); + expect_mapping(mapper.mappings()[0], 0, "profiles", 1, "_col1", file_root.type, + table_root.type); + ASSERT_EQ(mapper.mappings()[0].child_mappings.size(), 2); + expect_mapping(mapper.mappings()[0].child_mappings[0], 0, "key", 0, "key", key_type, key_type); + const auto& value_mapping = mapper.mappings()[0].child_mappings[1]; + expect_mapping(value_mapping, 0, "value", 1, "value", file_value.type, table_value.type); + ASSERT_EQ(value_mapping.child_mappings.size(), 2); + expect_mapping(value_mapping.child_mappings[0], 0, "id", 1, "id", int_type, int_type); + expect_mapping(value_mapping.child_mappings[1], 0, "name", 0, "name", string_type, string_type); +} + +TEST(ColumnMapperCreateMappingTest, + ByIndexPartitionColumnsTakeConstantAndDoNotConsumeFilePosition) { + const auto int_type = i32(); + const auto string_type = str(); + auto partition = name_col("dt", string_type); + partition.is_partition_key = true; + const std::vector table_schema = { + partition, + position_col("user_id", 0, int_type), + position_col("score", 1, int_type), + }; + const std::vector file_schema = { + field_id_col("_col0", 100, int_type, 0), + field_id_col("_col1", 101, int_type, 1), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX}); + ASSERT_TRUE(mapper.create_mapping(table_schema, + {{"dt", Field::create_field("2026-06-11")}}, + file_schema) + .ok()); + + ASSERT_EQ(mapper.mappings().size(), 3); + expect_constant(mapper, mapper.mappings()[0], 0, string_type); + expect_mapping(mapper.mappings()[1], 1, "user_id", 0, "_col0", int_type, int_type); + expect_mapping(mapper.mappings()[2], 2, "score", 1, "_col1", int_type, int_type); +} + +TEST(ColumnMapperCreateMappingTest, ByIndexOutOfRangeFallsBackToDefaultOrMissing) { + const auto int_type = i32(); + auto with_default = position_col("extra_default", 5, int_type); + const auto literal_expr = + VExprContext::create_shared(literal(int_type, Field::create_field(42))); + with_default.default_expr = literal_expr; + const std::vector table_schema = { + position_col("a", 0, int_type), + with_default, + position_col("extra_missing", 99, int_type), + }; + const std::vector file_schema = { + field_id_col("_col0", 100, int_type, 0), + field_id_col("_col1", 101, int_type, 1), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + ASSERT_EQ(mapper.mappings().size(), 3); + expect_mapping(mapper.mappings()[0], 0, "a", 0, "_col0", int_type, int_type); + expect_constant(mapper, mapper.mappings()[1], 1, int_type); + EXPECT_EQ(mapper.mappings()[1].default_expr, literal_expr); + expect_missing(mapper.mappings()[2]); +} + +TEST(ColumnMapperCreateMappingTest, ByIndexMissingIdentifierFallsBackToDefaultOrMissing) { + const auto int_type = i32(); + auto with_default = name_col("extra_default", int_type); + const auto literal_expr = + VExprContext::create_shared(literal(int_type, Field::create_field(42))); + with_default.default_expr = literal_expr; + const std::vector table_schema = { + position_col("a", 0, int_type), + with_default, + name_col("extra_missing", int_type), + }; + const std::vector file_schema = { + field_id_col("_col0", 100, int_type, 0), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + ASSERT_EQ(mapper.mappings().size(), 3); + expect_mapping(mapper.mappings()[0], 0, "a", 0, "_col0", int_type, int_type); + expect_constant(mapper, mapper.mappings()[1], 1, int_type); + EXPECT_EQ(mapper.mappings()[1].default_expr, literal_expr); + expect_missing(mapper.mappings()[2]); +} + +TEST(ColumnMapperCreateMappingTest, ByIndexOutOfRangeFallsBackToMissing) { + const auto int_type = i32(); + const std::vector table_schema = { + position_col("a", 0, int_type), + position_col("b", 5, int_type), + }; + const std::vector file_schema = { + field_id_col("_col0", 100, int_type, 0), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX}); + const auto status = mapper.create_mapping(table_schema, {}, file_schema); + ASSERT_TRUE(status.ok()) << status.to_string(); + + ASSERT_EQ(mapper.mappings().size(), 2); + expect_mapping(mapper.mappings()[0], 0, "a", 0, "_col0", int_type, int_type); + expect_missing(mapper.mappings()[1]); +} + +TEST(ColumnMapperCreateMappingTest, ByIndexIgnoresExtraFileColumns) { + const auto int_type = i32(); + const std::vector table_schema = { + position_col("a", 0, int_type), + }; + const std::vector file_schema = { + field_id_col("_col0", 100, int_type, 0), + field_id_col("_col1", 101, int_type, 1), + field_id_col("_col2", 102, int_type, 2), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + ASSERT_EQ(mapper.mappings().size(), 1); + expect_mapping(mapper.mappings()[0], 0, "a", 0, "_col0", int_type, int_type); +} + +TEST(ColumnMapperCreateMappingTest, ByIndexIgnoresFileColumnNames) { + const auto int_type = i32(); + const std::vector table_schema = { + position_col("a", 1, int_type), + }; + const std::vector file_schema = { + field_id_col("a", 100, int_type, 10), + field_id_col("b", 101, int_type, 20), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + ASSERT_EQ(mapper.mappings().size(), 1); + expect_mapping(mapper.mappings()[0], 0, "a", 20, "b", int_type, int_type); +} + +TEST(ColumnMapperCreateMappingTest, MissingColumnFallsBackToMissingMapping) { + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + const auto status = mapper.create_mapping({name_col("missing", i32())}, {}, + {name_col("present", i32(), 0)}); + ASSERT_TRUE(status.ok()) << status.to_string(); + + ASSERT_EQ(mapper.mappings().size(), 1); + expect_missing(mapper.mappings()[0]); +} + +// ---------------------------------------------------------------------- +// L1 constants and virtual columns. +// These tests verify non-file-backed mappings before TableReader materializes +// their final values. +// ---------------------------------------------------------------------- + +TEST(ColumnMapperConstantTest, PartitionDefaultAndVirtualColumnsUseDedicatedBranches) { + auto partition_column = name_col("dt", str()); + partition_column.is_partition_key = true; + + auto default_column = name_col("new_value", i32()); + default_column.default_expr = + VExprContext::create_shared(literal(i32(), Field::create_field(42))); + + auto row_id_column = name_col("_row_id", make_nullable(i64())); + auto sequence_column = name_col("_last_updated_sequence_number", make_nullable(i64())); + auto iceberg_rowid_column = name_col(BeConsts::ICEBERG_ROWID_COL, str()); + + const std::vector table_schema = { + partition_column, default_column, row_id_column, sequence_column, iceberg_rowid_column}; + const std::map partition_values = { + {"dt", Field::create_field("2026-06-11")}, + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping(table_schema, partition_values, {}).ok()); + + ASSERT_EQ(mapper.mappings().size(), 5); + expect_constant(mapper, mapper.mappings()[0], 0, str()); + expect_constant(mapper, mapper.mappings()[1], 1, i32()); + EXPECT_EQ(mapper.mappings()[2].virtual_column_type, TableVirtualColumnType::ROW_ID); + EXPECT_EQ(mapper.mappings()[3].virtual_column_type, + TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER); + EXPECT_EQ(mapper.mappings()[4].virtual_column_type, TableVirtualColumnType::ICEBERG_ROWID); +} + +TEST(ColumnMapperConstantTest, PhysicalRowLineageFiltersStayFinalizeOnly) { + auto row_id_column = name_col("_row_id", make_nullable(i64())); + auto sequence_column = name_col("_last_updated_sequence_number", make_nullable(i64())); + const std::vector table_schema = {row_id_column, sequence_column}; + const std::vector file_schema = { + name_col("_row_id", make_nullable(i64()), 2147483540), + name_col("_last_updated_sequence_number", make_nullable(i64()), 2147483539), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + ASSERT_EQ(mapper.mappings().size(), 2); + EXPECT_EQ(mapper.mappings()[0].virtual_column_type, TableVirtualColumnType::ROW_ID); + EXPECT_EQ(mapper.mappings()[0].filter_conversion, FilterConversionType::FINALIZE_ONLY); + EXPECT_EQ(mapper.mappings()[1].virtual_column_type, + TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER); + EXPECT_EQ(mapper.mappings()[1].filter_conversion, FilterConversionType::FINALIZE_ONLY); + + auto row_id_filter = + binary_predicate(TExprOpcode::EQ, table_slot(0, 0, make_nullable(i64()), "_row_id"), + literal(i64(), Field::create_field(1001))); + auto sequence_filter = binary_predicate( + TExprOpcode::EQ, + table_slot(1, 1, make_nullable(i64()), "_last_updated_sequence_number"), + literal(i64(), Field::create_field(77))); + TableFilter row_id_table_filter {.conjunct = VExprContext::create_shared(row_id_filter), + .global_indices = {GlobalIndex(0)}}; + TableFilter sequence_table_filter {.conjunct = VExprContext::create_shared(sequence_filter), + .global_indices = {GlobalIndex(1)}}; + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({row_id_table_filter, sequence_table_filter}, {}, + table_schema, &request) + .ok()); + + EXPECT_TRUE(request.conjuncts.empty()); + EXPECT_TRUE(request.predicate_columns.empty()); + EXPECT_EQ(projection_ids(request.non_predicate_columns), + std::vector({2147483540, 2147483539})); +} + +TEST(ColumnMapperConstantTest, MissingRowLineageDefaultExprStillUsesVirtualMapping) { + auto id_column = field_id_col("id", 1, make_nullable(i32())); + auto row_id_column = field_id_col("renamed_row_id", 2147483540, make_nullable(i64())); + row_id_column.default_expr = VExprContext::create_shared( + literal(make_nullable(i64()), Field::create_field(0))); + auto sequence_column = + field_id_col("renamed_last_updated_sequence_number", 2147483539, make_nullable(i64())); + sequence_column.default_expr = VExprContext::create_shared( + literal(make_nullable(i64()), Field::create_field(0))); + + const std::vector table_schema = {id_column, row_id_column, sequence_column}; + const std::vector file_schema = { + field_id_col("id", 1, make_nullable(i32()), 0), + field_id_col("name", 2, make_nullable(str()), 1), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + ASSERT_EQ(mapper.mappings().size(), 3); + expect_mapping(mapper.mappings()[0], 0, "id", 0, "id", make_nullable(i32()), + make_nullable(i32())); + EXPECT_EQ(mapper.mappings()[1].virtual_column_type, TableVirtualColumnType::ROW_ID); + EXPECT_FALSE(mapper.mappings()[1].constant_index.has_value()); + EXPECT_EQ(mapper.mappings()[2].virtual_column_type, + TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER); + EXPECT_FALSE(mapper.mappings()[2].constant_index.has_value()); + EXPECT_TRUE(mapper.constant_map().empty()); +} + +TEST(ColumnMapperConstantTest, ByFieldIdDoesNotTreatSameNameDifferentIdAsRowLineage) { + const std::vector table_schema = { + field_id_col("_row_id", 100, make_nullable(i64())), + field_id_col("_last_updated_sequence_number", 101, make_nullable(i64())), + }; + const std::vector file_schema = { + field_id_col("_row_id", 100, make_nullable(i64()), 0), + field_id_col("_last_updated_sequence_number", 101, make_nullable(i64()), 1), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + ASSERT_EQ(mapper.mappings().size(), 2); + expect_mapping(mapper.mappings()[0], 0, "_row_id", 0, "_row_id", make_nullable(i64()), + make_nullable(i64())); + EXPECT_EQ(mapper.mappings()[0].virtual_column_type, TableVirtualColumnType::INVALID); + EXPECT_EQ(mapper.mappings()[0].filter_conversion, FilterConversionType::COPY_DIRECTLY); + expect_mapping(mapper.mappings()[1], 1, "_last_updated_sequence_number", 1, + "_last_updated_sequence_number", make_nullable(i64()), make_nullable(i64())); + EXPECT_EQ(mapper.mappings()[1].virtual_column_type, TableVirtualColumnType::INVALID); + EXPECT_EQ(mapper.mappings()[1].filter_conversion, FilterConversionType::COPY_DIRECTLY); +} + +TEST(ColumnMapperConstantTest, PartitionAliasResolvesRenamedValue) { + auto partition_column = name_col("current_dt", str()); + partition_column.name_mapping = {"legacy_dt"}; + partition_column.is_partition_key = true; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping( + {partition_column}, + {{"legacy_dt", Field::create_field("2026-06-11")}}, {}) + .ok()); + + ASSERT_EQ(mapper.mappings().size(), 1); + expect_constant(mapper, mapper.mappings()[0], 0, str()); +} + +TEST(ColumnMapperConstantTest, PartitionConstantFilterEntryDoesNotReadFileColumns) { + auto partition_column = name_col("part", i32()); + partition_column.is_partition_key = true; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({partition_column}, + {{"part", Field::create_field(7)}}, {}) + .ok()); + + TableFilter filter { + .conjunct = VExprContext::create_shared(int_gt(table_slot(0, 0, i32(), "part"), 1)), + .global_indices = {GlobalIndex(0)}}; + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {partition_column}, &request).ok()); + + ASSERT_EQ(mapper.filter_entries().size(), 1); + ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_constant()); + EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(0)).constant_index(), + *mapper.mappings()[0].constant_index); + EXPECT_TRUE(request.local_positions.empty()); + EXPECT_TRUE(request.predicate_columns.empty()); + EXPECT_TRUE(request.non_predicate_columns.empty()); + EXPECT_TRUE(request.conjuncts.empty()); + EXPECT_TRUE(request.column_predicate_filters.empty()); +} + +TEST(ColumnMapperConstantTest, DefaultConstantFilterEntryUsesDefaultExpression) { + auto default_column = name_col("new_value", i32()); + default_column.default_expr = + VExprContext::create_shared(literal(i32(), Field::create_field(42))); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({default_column}, {}, {}).ok()); + + TableFilter filter {.conjunct = VExprContext::create_shared( + int_gt(table_slot(0, 0, i32(), "new_value"), 1)), + .global_indices = {GlobalIndex(0)}}; + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {default_column}, &request).ok()); + + ASSERT_EQ(mapper.filter_entries().size(), 1); + ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_constant()); + const auto constant_index = mapper.filter_entries().at(GlobalIndex(0)).constant_index(); + EXPECT_EQ(constant_index, *mapper.mappings()[0].constant_index); + EXPECT_EQ(mapper.constant_map().get(constant_index).expr, default_column.default_expr); + EXPECT_TRUE(request.local_positions.empty()); + EXPECT_TRUE(request.predicate_columns.empty()); + EXPECT_TRUE(request.non_predicate_columns.empty()); + EXPECT_TRUE(request.conjuncts.empty()); +} + +TEST(ColumnMapperConstantTest, MixedConstantAndFileFilterKeepsOnlyFileScanColumn) { + auto partition_column = name_col("part", i32()); + partition_column.is_partition_key = true; + const auto file_column = name_col("score", i32(), 3); + const std::vector table_schema = {partition_column, file_column}; + const std::vector file_schema = {file_column}; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {{"part", Field::create_field(7)}}, + file_schema) + .ok()); + + TableFilter constant_filter { + .conjunct = VExprContext::create_shared(int_gt(table_slot(0, 0, i32(), "part"), 1)), + .global_indices = {GlobalIndex(0)}}; + TableFilter file_filter { + .conjunct = VExprContext::create_shared(int_gt(table_slot(1, 1, i32(), "score"), 10)), + .global_indices = {GlobalIndex(1)}}; + + FileScanRequest request; + ASSERT_TRUE( + mapper.create_scan_request({constant_filter, file_filter}, {}, table_schema, &request) + .ok()); + + ASSERT_EQ(mapper.filter_entries().size(), 2); + ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_constant()); + ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(1)).is_local()); + EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(1)).local_index(), LocalIndex(0)); + ASSERT_EQ(request.predicate_columns.size(), 1); + EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(3)); + EXPECT_TRUE(request.non_predicate_columns.empty()); +} + +// ---------------------------------------------------------------------- +// L1 direct filter localization tests. +// These tests call localize_filters() directly to pin the core interface +// contract apart from create_scan_request() initialization. +// ---------------------------------------------------------------------- + +TEST(ColumnMapperLocalizeFiltersTest, VisibleLocalFilterAddsPredicateColumnAndConjunct) { + const auto int_type = i32(); + const std::vector table_schema = {name_col("id", int_type)}; + const std::vector file_schema = {name_col("id", int_type, 7)}; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + TableFilter filter {.conjunct = VExprContext::create_shared(table_slot(11, 0, int_type, "id")), + .global_indices = {GlobalIndex(0)}}; + + FileScanRequest request; + ASSERT_TRUE(mapper.localize_filters({filter}, {}, &request).ok()); + + EXPECT_TRUE(request.non_predicate_columns.empty()); + ASSERT_EQ(request.predicate_columns.size(), 1); + EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(7)); + ASSERT_EQ(request.local_positions.size(), 1); + EXPECT_EQ(request.local_positions.at(LocalColumnId(7)), LocalIndex(0)); + ASSERT_EQ(mapper.filter_entries().size(), 1); + ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_local()); + EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(0)).local_index(), LocalIndex(0)); + + ASSERT_EQ(request.conjuncts.size(), 1); + const auto* localized_slot = assert_cast(request.conjuncts[0]->root().get()); + EXPECT_EQ(localized_slot->slot_id(), 11); + EXPECT_EQ(localized_slot->column_id(), 0); + EXPECT_EQ(localized_slot->column_name(), "id"); + EXPECT_TRUE(localized_slot->data_type()->equals(*int_type)); +} + +TEST(ColumnMapperLocalizeFiltersTest, ConstantFilterBuildsEntryWithoutFileScanColumn) { + auto partition_column = name_col("part", i32()); + partition_column.is_partition_key = true; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({partition_column}, + {{"part", Field::create_field(7)}}, {}) + .ok()); + + TableFilter filter {.conjunct = VExprContext::create_shared(table_slot(3, 0, i32(), "part")), + .global_indices = {GlobalIndex(0)}}; + + FileScanRequest request; + ASSERT_TRUE(mapper.localize_filters({filter}, {}, &request).ok()); + + EXPECT_TRUE(request.predicate_columns.empty()); + EXPECT_TRUE(request.non_predicate_columns.empty()); + EXPECT_TRUE(request.local_positions.empty()); + EXPECT_TRUE(request.conjuncts.empty()); + ASSERT_EQ(mapper.filter_entries().size(), 1); + ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_constant()); + EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(0)).constant_index(), + mapper.mappings()[0].constant_index); +} + +TEST(ColumnMapperLocalizeFiltersTest, ColumnPredicatesUseOnlyExistingLocalPositions) { + const auto int_type = i32(); + const std::vector table_schema = {name_col("id", int_type)}; + const std::vector file_schema = {name_col("id", int_type, 3)}; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + TableColumnPredicates predicates; + predicates[GlobalIndex(0)] = {create_comparison_predicate( + 0, "id", int_type, Field::create_field(10), false)}; + + FileScanRequest request_without_local_position; + ASSERT_TRUE(mapper.localize_filters({}, predicates, &request_without_local_position).ok()); + EXPECT_TRUE(request_without_local_position.column_predicate_filters.empty()); + ASSERT_EQ(mapper.filter_entries().size(), 1); + EXPECT_FALSE(mapper.filter_entries().at(GlobalIndex(0)).is_local()); + + FileScanRequest request_with_local_position; + request_with_local_position.non_predicate_columns.push_back( + LocalColumnIndex::top_level(LocalColumnId(3))); + request_with_local_position.local_positions.emplace(LocalColumnId(3), LocalIndex(0)); + ASSERT_TRUE(mapper.localize_filters({}, predicates, &request_with_local_position).ok()); + + ASSERT_EQ(request_with_local_position.non_predicate_columns.size(), 1); + EXPECT_EQ(request_with_local_position.non_predicate_columns[0].column_id(), LocalColumnId(3)); + EXPECT_TRUE(request_with_local_position.predicate_columns.empty()); + ASSERT_EQ(request_with_local_position.column_predicate_filters.size(), 1); + EXPECT_EQ(request_with_local_position.column_predicate_filters[0].effective_file_column_id(), + LocalColumnId(3)); + ASSERT_EQ(request_with_local_position.column_predicate_filters[0].predicates.size(), 1); + EXPECT_EQ(request_with_local_position.column_predicate_filters[0].predicates[0]->type(), + PredicateType::GT); + ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_local()); + EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(0)).local_index(), LocalIndex(0)); +} + +TEST(ColumnMapperLocalizeFiltersTest, NestedFilterOnlyChildMergesIntoPredicateProjection) { + const auto int_type = i32(); + const auto string_type = str(); + + auto table_a = name_col("a", int_type); + auto table_b = name_col("b", string_type); + auto table_struct = struct_name_col("s", {table_b}); + auto full_table_struct = struct_name_col("s", {table_a, table_b}); + + auto file_a = name_col("a", int_type, 0); + auto file_b = name_col("b", string_type, 1); + auto file_struct = struct_name_col("s", {file_a, file_b}, 5); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok()); + + auto filter_expr = int_gt( + struct_element(table_slot(0, 0, full_table_struct.type, "s"), int_type, "a"), 10); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + FileScanRequest request; + ASSERT_TRUE(mapper.localize_filters({filter}, {}, &request).ok()); + + EXPECT_TRUE(request.non_predicate_columns.empty()); + ASSERT_EQ(request.predicate_columns.size(), 1); + EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(5)); + ASSERT_FALSE(request.predicate_columns[0].project_all_children); + EXPECT_EQ(projection_ids(request.predicate_columns[0].children), std::vector({0, 1})); + ASSERT_EQ(request.local_positions.size(), 1); + EXPECT_EQ(request.local_positions.at(LocalColumnId(5)), LocalIndex(0)); + ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_local()); + EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(0)).local_index(), LocalIndex(0)); + ASSERT_EQ(request.column_predicate_filters.size(), 1); + EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(5)); + EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(), + std::vector({0})); + EXPECT_EQ(target_names(request.column_predicate_filters[0].target.struct_target.get()), + std::vector({"a"})); +} + +TEST(ColumnMapperLocalizeFiltersTest, PreservesExistingScanStateWhenAddingPredicateColumn) { + const auto int_type = i32(); + const std::vector table_schema = { + name_col("id", int_type), + name_col("score", int_type), + }; + const std::vector file_schema = { + name_col("id", int_type, 3), + name_col("score", int_type, 4), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + TableFilter filter {.conjunct = VExprContext::create_shared(table_slot(2, 0, int_type, "id")), + .global_indices = {GlobalIndex(0)}}; + + FileScanRequest request; + request.non_predicate_columns.push_back(LocalColumnIndex::top_level(LocalColumnId(4))); + request.local_positions.emplace(LocalColumnId(4), LocalIndex(0)); + ASSERT_TRUE(mapper.localize_filters({filter}, {}, &request).ok()); + + ASSERT_EQ(request.non_predicate_columns.size(), 1); + EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(4)); + ASSERT_EQ(request.predicate_columns.size(), 1); + EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(3)); + ASSERT_EQ(request.local_positions.size(), 2); + EXPECT_EQ(request.local_positions.at(LocalColumnId(4)), LocalIndex(0)); + EXPECT_EQ(request.local_positions.at(LocalColumnId(3)), LocalIndex(1)); + ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_local()); + EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(0)).local_index(), LocalIndex(1)); +} + +// ---------------------------------------------------------------------- +// L1 scan request and filter localization tests. +// These tests assert predicate/non-predicate split, local positions, hidden +// filter mappings, and nested predicate targets. +// ---------------------------------------------------------------------- + +TEST(ColumnMapperScanRequestTest, ColumnPredicatesDoNotForceRowPredicateMaterialization) { + const auto int_type = i32(); + const auto string_type = str(); + const std::vector table_schema = { + name_col("id", int_type), + name_col("name", string_type), + }; + const std::vector file_schema = { + name_col("id", int_type, 0), + name_col("name", string_type, 1), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + TableColumnPredicates predicates; + predicates[GlobalIndex(0)] = {create_comparison_predicate( + 0, "id", int_type, Field::create_field(10), false)}; + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({}, predicates, table_schema, &request).ok()); + + EXPECT_TRUE(request.predicate_columns.empty()); + EXPECT_EQ(projection_ids(request.non_predicate_columns), std::vector({0, 1})); + ASSERT_EQ(request.local_positions.size(), 2); + EXPECT_EQ(request.local_positions.at(LocalColumnId(0)), LocalIndex(0)); + EXPECT_EQ(request.local_positions.at(LocalColumnId(1)), LocalIndex(1)); + ASSERT_EQ(request.column_predicate_filters.size(), 1); + EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(0)); +} + +TEST(ColumnMapperScanRequestTest, HiddenTopLevelFilterMappingUsesNameFallback) { + const auto int_type = i32(); + const std::vector table_schema = { + field_id_col("id", 1, int_type), + }; + const std::vector file_schema = { + field_id_col("id", 1, int_type, 0), + field_id_col("score", 2, int_type, 1), + }; + + auto filter_expr = int_gt(table_slot(7, 1, int_type, "score"), 10); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(1)}}; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok()); + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, table_schema, &request).ok()); + + ASSERT_EQ(request.non_predicate_columns.size(), 1); + EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(0)); + ASSERT_EQ(request.predicate_columns.size(), 1); + EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(1)); + ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(1)).is_local()); + EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(1)).local_index(), LocalIndex(1)); +} + +TEST(ColumnMapperScanRequestTest, StructOutputAndFilterOnlyChildAreMerged) { + const auto int_type = i32(); + const auto string_type = str(); + + auto table_a = name_col("a", int_type); + auto table_b = name_col("b", string_type); + auto table_struct = struct_name_col("s", {table_b}); + auto full_table_struct = struct_name_col("s", {table_a, table_b}); + + auto file_a = name_col("a", int_type, 0); + auto file_b = name_col("b", string_type, 1); + auto file_struct = struct_name_col("s", {file_a, file_b}, 5); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok()); + + auto filter_expr = int_gt( + struct_element(table_slot(0, 0, full_table_struct.type, "s"), int_type, "a"), 10); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok()); + + EXPECT_TRUE(request.non_predicate_columns.empty()); + ASSERT_EQ(request.predicate_columns.size(), 1); + EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(5)); + ASSERT_FALSE(request.predicate_columns[0].project_all_children); + EXPECT_EQ(projection_ids(request.predicate_columns[0].children), std::vector({0, 1})); + ASSERT_EQ(request.column_predicate_filters.size(), 1); + EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(), + std::vector({0})); + EXPECT_EQ(target_names(request.column_predicate_filters[0].target.struct_target.get()), + std::vector({"a"})); +} + +TEST(ColumnMapperScanRequestTest, RenamedNestedPredicateTargetsMappedFileChild) { + const auto int_type = i32(); + + auto table_a = field_id_col("a", 1, int_type); + auto table_renamed_b = field_id_col("renamed_b", 2, int_type); + auto table_struct = struct_col("s", 10, {table_a, table_renamed_b}); + auto file_a = field_id_col("a", 1, int_type, 0); + auto file_b = field_id_col("b", 2, int_type, 1); + auto file_struct = struct_col("s", 10, {file_a, file_b}, 5); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok()); + + auto filter_expr = int_gt( + struct_element(table_slot(0, 0, table_struct.type, "s"), int_type, "renamed_b"), 10); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok()); + + ASSERT_EQ(request.column_predicate_filters.size(), 1); + EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(5)); + EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(), + std::vector({1})); + EXPECT_EQ(target_names(request.column_predicate_filters[0].target.struct_target.get()), + std::vector({"b"})); +} + +TEST(ColumnMapperScanRequestTest, NestedInNullAndReverseComparisonFiltersAreMerged) { + const auto int_type = i32(); + const auto string_type = str(); + + auto table_a = name_col("a", int_type); + auto table_b = name_col("b", string_type); + auto table_struct = struct_name_col("s", {table_b}); + auto full_table_struct = struct_name_col("s", {table_a, table_b}); + + auto file_a = name_col("a", int_type, 0); + auto file_b = name_col("b", string_type, 1); + auto file_struct = struct_name_col("s", {file_a, file_b}, 5); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok()); + + const auto nested_a = + struct_element(table_slot(0, 0, full_table_struct.type, "s"), int_type, "a"); + auto in_filter = + in_predicate(nested_a, int_type, + {Field::create_field(5), Field::create_field(7)}); + auto reverse_filter = binary_predicate( + TExprOpcode::LT, literal(int_type, Field::create_field(3)), nested_a); + auto null_filter = null_predicate(nested_a, true); + auto not_null_filter = null_predicate(nested_a, false); + auto filter_expr = compound_predicate( + TExprOpcode::COMPOUND_AND, + compound_predicate(TExprOpcode::COMPOUND_AND, in_filter, reverse_filter), + compound_predicate(TExprOpcode::COMPOUND_AND, null_filter, not_null_filter)); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok()); + + ASSERT_EQ(request.column_predicate_filters.size(), 1); + EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(5)); + EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(), + std::vector({0})); + EXPECT_EQ(target_names(request.column_predicate_filters[0].target.struct_target.get()), + std::vector({"a"})); + ASSERT_EQ(request.column_predicate_filters[0].predicates.size(), 4); + EXPECT_EQ(request.column_predicate_filters[0].predicates[0]->type(), PredicateType::IN_LIST); + EXPECT_EQ(request.column_predicate_filters[0].predicates[1]->type(), PredicateType::GT); + EXPECT_EQ(request.column_predicate_filters[0].predicates[2]->type(), PredicateType::IS_NULL); + EXPECT_EQ(request.column_predicate_filters[0].predicates[3]->type(), + PredicateType::IS_NOT_NULL); +} + +TEST(ColumnMapperScanRequestTest, NestedPredicateFilterThroughSafeCast) { + const auto file_int_type = i32(); + const auto table_bigint_type = i64(); + const auto string_type = str(); + + auto table_b = name_col("b", string_type); + auto table_struct = struct_name_col("s", {table_b}); + auto full_table_struct = std::make_shared( + DataTypes {table_bigint_type, string_type}, Strings {"a", "b"}); + + auto file_a = name_col("a", file_int_type, 0); + auto file_b = name_col("b", string_type, 1); + auto file_struct = struct_name_col("s", {file_a, file_b}, 5); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok()); + + const auto nested_a = + struct_element(table_slot(0, 0, full_table_struct, "s"), file_int_type, "a"); + auto filter_expr = + binary_predicate(TExprOpcode::GT, cast_expr(nested_a, table_bigint_type), + literal(table_bigint_type, Field::create_field(5))); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok()); + + ASSERT_EQ(request.column_predicate_filters.size(), 1); + EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(5)); + EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(), + std::vector({0})); + ASSERT_EQ(request.column_predicate_filters[0].predicates.size(), 1); + EXPECT_EQ(request.column_predicate_filters[0].predicates[0]->type(), PredicateType::GT); +} + +TEST(ColumnMapperScanRequestTest, UnsafeCastDoesNotBuildNestedPredicateFilter) { + const auto file_bigint_type = i64(); + const auto table_int_type = i32(); + const auto string_type = str(); + + auto table_b = name_col("b", string_type); + auto table_struct = struct_name_col("s", {table_b}); + auto full_table_struct = std::make_shared( + DataTypes {table_int_type, string_type}, Strings {"a", "b"}); + + auto file_a = name_col("a", file_bigint_type, 0); + auto file_b = name_col("b", string_type, 1); + auto file_struct = struct_name_col("s", {file_a, file_b}, 5); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok()); + + const auto nested_a = + struct_element(table_slot(0, 0, full_table_struct, "s"), file_bigint_type, "a"); + auto filter_expr = binary_predicate(TExprOpcode::GT, cast_expr(nested_a, table_int_type), + literal(table_int_type, Field::create_field(5))); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok()); + + EXPECT_TRUE(request.column_predicate_filters.empty()); + ASSERT_EQ(request.predicate_columns.size(), 1); + EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(5)); + EXPECT_EQ(projection_ids(request.predicate_columns[0].children), std::vector({0, 1})); +} + +TEST(ColumnMapperScanRequestTest, DeepNestedPredicateTargetsLeafPath) { + const auto id_type = i32(); + const auto name_type = str(); + const auto string_type = str(); + auto table_b = name_col("b", string_type); + auto table_struct = struct_name_col("s", {table_b}); + + auto full_table_inner_type = + std::make_shared(DataTypes {id_type, name_type}, Strings {"id", "n"}); + auto full_table_struct_type = std::make_shared( + DataTypes {full_table_inner_type, string_type}, Strings {"a", "b"}); + + auto file_id = name_col("id", id_type, 0); + auto file_name = name_col("n", name_type, 1); + auto file_a = struct_name_col("a", {file_id, file_name}, 0); + auto file_b = name_col("b", string_type, 1); + auto file_struct = struct_name_col("s", {file_a, file_b}, 5); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok()); + + const auto nested_id = + struct_element(struct_element(table_slot(0, 0, full_table_struct_type, "s"), + full_table_inner_type, "a"), + id_type, "id"); + auto filter_expr = + in_predicate(nested_id, id_type, + {Field::create_field(5), Field::create_field(7)}); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok()); + + ASSERT_EQ(request.column_predicate_filters.size(), 1); + EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(5)); + EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(), + std::vector({0, 0})); + EXPECT_EQ(target_names(request.column_predicate_filters[0].target.struct_target.get()), + std::vector({"a", "id"})); + ASSERT_EQ(request.column_predicate_filters[0].predicates.size(), 1); + EXPECT_EQ(request.column_predicate_filters[0].predicates[0]->type(), PredicateType::IN_LIST); +} + +TEST(ColumnMapperScanRequestTest, ArrayStructProjectionPrunesElementChildren) { + const auto int_type = i32(); + const auto string_type = str(); + auto table_b = name_col("b", string_type); + auto table_element = struct_name_col("element", {table_b}); + auto table_array = array_col("items", -1, table_element); + table_array.identifier = Field::create_field("items"); + set_name_identifiers(&table_array, -1); + + auto file_a = name_col("a", int_type, 0); + auto file_b = name_col("b", string_type, 1); + auto file_element = struct_name_col("element", {file_a, file_b}, 0); + auto file_array = array_col("items", -1, file_element, 4); + file_array.identifier = Field::create_field("items"); + set_name_identifiers(&file_array, 4); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_array}, {}, {file_array}).ok()); + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_array}, &request).ok()); + + ASSERT_EQ(request.non_predicate_columns.size(), 1); + const auto& projection = request.non_predicate_columns[0]; + EXPECT_EQ(projection.column_id(), LocalColumnId(4)); + ASSERT_FALSE(projection.project_all_children); + ASSERT_EQ(projection.children.size(), 1); + EXPECT_EQ(projection.children[0].local_id(), 0); + ASSERT_EQ(projection.children[0].children.size(), 1); + EXPECT_EQ(projection.children[0].children[0].local_id(), 1); + + const auto* mapped_array = assert_cast( + remove_nullable(mapper.mappings()[0].file_type).get()); + const auto* mapped_element = assert_cast( + remove_nullable(mapped_array->get_nested_type()).get()); + ASSERT_EQ(mapped_element->get_elements().size(), 1); + EXPECT_EQ(mapped_element->get_element_name(0), "b"); +} + +TEST(ColumnMapperScanRequestTest, MapValueStructProjectionPrunesValueChildren) { + const auto key_type = str(); + const auto int_type = i32(); + const auto string_type = str(); + + auto table_value_b = name_col("b", string_type); + auto table_value = struct_name_col("value", {table_value_b}); + auto table_map = map_col("m", -1, {table_value}, key_type, table_value.type); + table_map.identifier = Field::create_field("m"); + set_name_identifiers(&table_map, -1); + + auto file_key = name_col("key", key_type, 0); + auto file_value_a = name_col("a", int_type, 0); + auto file_value_b = name_col("b", string_type, 1); + auto file_value = struct_name_col("value", {file_value_a, file_value_b}, 1); + auto file_map = map_col("m", -1, {file_key, file_value}, key_type, file_value.type, 6); + file_map.identifier = Field::create_field("m"); + set_name_identifiers(&file_map, 6); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok()); + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_map}, &request).ok()); + + ASSERT_EQ(request.non_predicate_columns.size(), 1); + const auto& projection = request.non_predicate_columns[0]; + EXPECT_EQ(projection.column_id(), LocalColumnId(6)); + ASSERT_FALSE(projection.project_all_children); + ASSERT_EQ(projection.children.size(), 1); + EXPECT_EQ(projection.children[0].local_id(), 1); + ASSERT_EQ(projection.children[0].children.size(), 1); + EXPECT_EQ(projection.children[0].children[0].local_id(), 1); + + const auto* mapped_map = + assert_cast(remove_nullable(mapper.mappings()[0].file_type).get()); + const auto* mapped_value = + assert_cast(remove_nullable(mapped_map->get_value_type()).get()); + ASSERT_EQ(mapped_value->get_elements().size(), 1); + EXPECT_EQ(mapped_value->get_element_name(0), "b"); +} + +// Scenario: a table struct projects only child `b`, while the file struct stores `a,b`. +// BY_NAME mapping should read only the physical child `b` and rebuild the mapped file type to the +// projected struct shape. +TEST(ColumnMapperScanRequestTest, StructProjectionPrunesChildrenByName) { + const auto int_type = i32(); + const auto string_type = str(); + + auto table_b = name_col("b", string_type); + auto table_struct = struct_name_col("s", {table_b}); + set_name_identifiers(&table_struct, 0); + + auto file_a = name_col("a", int_type, 0); + auto file_b = name_col("b", string_type, 1); + auto file_struct = struct_name_col("s", {file_a, file_b}, 0); + set_name_identifiers(&file_struct, 0); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok()); + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_struct}, &request).ok()); + + ASSERT_EQ(request.non_predicate_columns.size(), 1); + const auto& projection = request.non_predicate_columns[0]; + EXPECT_EQ(projection.column_id(), LocalColumnId(0)); + ASSERT_FALSE(projection.project_all_children); + ASSERT_EQ(projection.children.size(), 1); + EXPECT_EQ(projection.children[0].local_id(), 1); + + ASSERT_EQ(mapper.mappings().size(), 1); + const auto* projected_type = assert_cast( + remove_nullable(mapper.mappings()[0].file_type).get()); + ASSERT_EQ(projected_type->get_elements().size(), 1); + EXPECT_EQ(projected_type->get_element_name(0), "b"); +} + +// Scenario: a row filter reaches a struct child through an array wrapper +// (`items.item.a > 5`). The nested predicate filter path only supports direct struct paths, so +// the mapper keeps this as a row predicate and reads the full array root for predicate evaluation. +TEST(ColumnMapperScanRequestTest, ArrayWrapperDoesNotBuildNestedPredicateFilter) { + const auto int_type = i32(); + const auto string_type = str(); + + auto file_a = name_col("a", int_type, 0); + auto file_b = name_col("b", string_type, 1); + auto file_element = struct_name_col("item", {file_a, file_b}, 0); + auto file_array = array_col("items", -1, file_element, 0); + set_name_identifiers(&file_array, 0); + + auto table_array = file_array; + + const auto item_type = file_element.type; + auto item_expr = struct_element(table_slot(0, 0, table_array.type, "items"), item_type, "item"); + auto filter_expr = int_gt(struct_element(item_expr, int_type, "a"), 5); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_array}, {}, {file_array}).ok()); + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_array}, &request).ok()); + + EXPECT_TRUE(request.non_predicate_columns.empty()); + ASSERT_EQ(request.predicate_columns.size(), 1); + EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(0)); + EXPECT_TRUE(request.predicate_columns[0].project_all_children); + EXPECT_TRUE(request.predicate_columns[0].children.empty()); + EXPECT_TRUE(request.column_predicate_filters.empty()); +} + +// Scenario: a map value struct projects child `b`, while a row filter reads value child `a`. +// The filter is too complex to become a file-local nested predicate, but the predicate projection +// must replace the output projection for the same map root and contain both physical value children. +TEST(ColumnMapperScanRequestTest, MapFilterOnlyValueChildMergesWithOutputProjection) { + const auto key_type = i32(); + const auto int_type = i32(); + const auto string_type = str(); + + auto table_value_b = name_col("b", string_type); + auto table_value = struct_name_col("value", {table_value_b}); + auto table_map = map_col("m", -1, {table_value}, key_type, table_value.type); + set_name_identifiers(&table_map, 0); + + auto file_key = name_col("key", key_type, 0); + auto file_value_a = name_col("a", int_type, 0); + auto file_value_b = name_col("b", string_type, 1); + auto file_value = struct_name_col("value", {file_value_a, file_value_b}, 1); + auto file_map = map_col("m", -1, {file_key, file_value}, key_type, file_value.type, 0); + set_name_identifiers(&file_map, 0); + + auto full_value_type = + std::make_shared(DataTypes {int_type, string_type}, Strings {"a", "b"}); + auto full_map_type = std::make_shared(key_type, full_value_type); + auto value_expr = + struct_element(table_slot(0, 0, full_map_type, "m"), full_value_type, "value"); + auto filter_expr = int_gt(struct_element(value_expr, int_type, "a"), 5); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok()); + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_map}, &request).ok()); + + EXPECT_TRUE(request.non_predicate_columns.empty()); + ASSERT_EQ(request.predicate_columns.size(), 1); + const auto& projection = request.predicate_columns[0]; + EXPECT_EQ(projection.column_id(), LocalColumnId(0)); + ASSERT_FALSE(projection.project_all_children); + ASSERT_EQ(projection.children.size(), 1); + EXPECT_EQ(projection.children[0].local_id(), 1); + EXPECT_EQ(projection_ids(projection.children[0].children), std::vector({0, 1})); + EXPECT_TRUE(request.column_predicate_filters.empty()); +} + +// Scenario: when projected struct children are an in-order prefix of the file struct, the mapper can +// read those physical children directly without rebuilding the file-side complex type. +TEST(ColumnMapperScanRequestTest, MatchingProjectedStructDoesNotNeedComplexRematerialize) { + const auto int_type = i32(); + const auto string_type = str(); + + auto table_a = field_id_col("a", 1, int_type); + auto table_b = field_id_col("b", 2, string_type); + auto table_struct = struct_col("s", 10, {table_a, table_b}); + + auto file_a = field_id_col("a", 1, int_type, 0); + auto file_b = field_id_col("b", 2, string_type, 1); + auto file_c = field_id_col("c", 3, int_type, 2); + auto file_struct = struct_col("s", 10, {file_a, file_b, file_c}, 5); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok()); + + ASSERT_EQ(mapper.mappings().size(), 1); + EXPECT_TRUE(mapper.mappings()[0].is_trivial); + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_struct}, &request).ok()); + + ASSERT_EQ(request.non_predicate_columns.size(), 1); + const auto& projection = request.non_predicate_columns[0]; + EXPECT_FALSE(projection.project_all_children); + EXPECT_EQ(projection_ids(projection.children), std::vector({0, 1})); + EXPECT_TRUE(mapper.mappings()[0].is_trivial); +} + +// Scenario: Iceberg field-id mapping sees a renamed struct child, but the physical child order and +// types still match, so projection remains a full physical read instead of rebuilding a new type. +TEST(ColumnMapperScanRequestTest, RenameOnlyProjectedStructDoesNotRebuildFileProjection) { + const auto int_type = i32(); + + auto table_a = field_id_col("a", 1, int_type); + auto table_renamed_b = field_id_col("renamed_b", 2, int_type); + auto table_struct = struct_col("s", 10, {table_a, table_renamed_b}); + + auto file_a = field_id_col("a", 1, int_type, 0); + auto file_b = field_id_col("b", 2, int_type, 1); + auto file_struct = struct_col("s", 10, {file_a, file_b}, 5); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok()); + + ASSERT_EQ(mapper.mappings().size(), 1); + EXPECT_TRUE(mapper.mappings()[0].is_trivial); + EXPECT_EQ(mapper.mappings()[0].projected_file_children.size(), + mapper.mappings()[0].original_file_children.size()); + ASSERT_EQ(mapper.mappings()[0].child_mappings.size(), 2); + EXPECT_EQ(mapper.mappings()[0].child_mappings[1].table_column_name, "renamed_b"); + EXPECT_EQ(mapper.mappings()[0].child_mappings[1].file_column_name, "b"); + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_struct}, &request).ok()); + + ASSERT_EQ(request.non_predicate_columns.size(), 1); + EXPECT_TRUE(request.non_predicate_columns[0].project_all_children); + EXPECT_TRUE(request.non_predicate_columns[0].children.empty()); + EXPECT_TRUE(mapper.mappings()[0].is_trivial); +} + +// Scenario: a row filter references an unprojected struct child, so the predicate projection is +// merged with the output projection and the mapper rebuilds the projected file struct type. +TEST(ColumnMapperScanRequestTest, PredicateProjectionRebuildsProjectedStructFileType) { + const auto int_type = i32(); + const auto string_type = str(); + + auto table_a = field_id_col("a", 1, int_type); + auto table_b = field_id_col("b", 2, string_type); + auto table_struct = struct_col("s", 10, {table_a, table_b}); + auto full_table_c = field_id_col("c", 3, int_type); + auto full_table_struct = struct_col("s", 10, {table_a, table_b, full_table_c}); + + auto file_a = field_id_col("a", 1, int_type, 0); + auto file_b = field_id_col("b", 2, string_type, 1); + auto file_c = field_id_col("c", 3, int_type, 2); + auto file_struct = struct_col("s", 10, {file_a, file_b, file_c}, 5); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok()); + + auto filter_expr = + int_gt(struct_element(table_slot(0, 0, full_table_struct.type, "s"), int_type, "c"), 0); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok()); + + ASSERT_EQ(request.predicate_columns.size(), 1); + EXPECT_TRUE(request.non_predicate_columns.empty()); + const auto& projection = request.predicate_columns[0]; + EXPECT_FALSE(projection.project_all_children); + EXPECT_EQ(projection_ids(projection.children), std::vector({0, 1, 2})); + + const auto* mapped_type = assert_cast( + remove_nullable(mapper.mappings()[0].file_type).get()); + ASSERT_EQ(mapped_type->get_elements().size(), 3); + EXPECT_EQ(mapped_type->get_element_name(0), "a"); + EXPECT_EQ(mapped_type->get_element_name(1), "b"); + EXPECT_EQ(mapped_type->get_element_name(2), "c"); + EXPECT_FALSE(mapper.mappings()[0].is_trivial); +} + +// Scenario: a filter references a top-level column that is not projected by the query; the mapper +// creates a hidden filter mapping without adding that hidden column to visible table mappings. +TEST(ColumnMapperScanRequestTest, PredicateOnlyTopLevelColumnUsesHiddenMapping) { + const auto int_type = i32(); + + auto table_id = field_id_col("id", 0, int_type); + auto table_c = field_id_col("c", 11, int_type); + auto table_struct = struct_col("s", 10, {table_c}); + + auto file_id = field_id_col("id", 0, int_type, 0); + auto file_c = field_id_col("c", 11, int_type, 0); + auto file_struct = struct_col("s", 10, {file_c}, 10); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping({table_id}, {}, {file_id, file_struct}).ok()); + ASSERT_EQ(mapper.mappings().size(), 1); + EXPECT_EQ(mapper.mappings()[0].table_column_name, "id"); + + auto filter_expr = + int_gt(struct_element(table_slot(7, 1, table_struct.type, "s"), int_type, "c"), 0); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(1)}}; + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_id}, &request).ok()); + + ASSERT_EQ(mapper.mappings().size(), 1); + EXPECT_EQ(mapper.mappings()[0].table_column_name, "id"); + + ASSERT_EQ(request.non_predicate_columns.size(), 1); + EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(0)); + ASSERT_EQ(request.predicate_columns.size(), 1); + EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(10)); + EXPECT_TRUE(request.predicate_columns[0].project_all_children); + EXPECT_TRUE(request.predicate_columns[0].children.empty()); + + ASSERT_EQ(request.conjuncts.size(), 1); + ASSERT_EQ(request.column_predicate_filters.size(), 1); + EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(10)); + EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(), + std::vector({0})); +} + +// Scenario: a nested predicate targets a table-side renamed struct field; both predicate pruning and +// scan projection must resolve that field to the old physical file child. +TEST(ColumnMapperScanRequestTest, NestedPredicateProjectionUsesMappedRenamedChild) { + const auto int_type = i32(); + + auto table_a = field_id_col("a", 1, int_type); + auto table_renamed_b = field_id_col("renamed_b", 2, int_type); + auto table_struct = struct_col("s", 10, {table_a, table_renamed_b}); + + auto file_a = field_id_col("a", 1, int_type, 0); + auto file_b = field_id_col("b", 2, int_type, 1); + auto file_struct = struct_col("s", 10, {file_a, file_b}, 10); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok()); + + auto filter_expr = int_gt( + struct_element(table_slot(0, 0, table_struct.type, "s"), int_type, "renamed_b"), 0); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok()); + + ASSERT_EQ(request.column_predicate_filters.size(), 1); + EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(10)); + EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(), + std::vector({1})); + ASSERT_EQ(request.predicate_columns.size(), 1); + EXPECT_TRUE(request.predicate_columns[0].project_all_children); + EXPECT_TRUE(request.predicate_columns[0].children.empty()); +} + +// Scenario: element_at(struct, 'table_name') in a row filter is localized to the physical file +// child name, matching the struct_element rewrite and nested predicate filter resolution paths. +TEST(ColumnMapperScanRequestTest, + FileLocalElementAtConjunctUsesFileChildNameForRenamedStructField) { + const auto int_type = i32(); + + auto table_a = field_id_col("a", 1, int_type); + auto table_renamed_b = field_id_col("renamed_b", 2, int_type); + auto table_struct = struct_col("s", 10, {table_a, table_renamed_b}); + + auto file_a = field_id_col("a", 1, int_type, 0); + auto file_b = field_id_col("b", 2, int_type, 1); + auto file_struct = struct_col("s", 10, {file_a, file_b}, 10); + + auto child_expr = element_at(table_slot(0, 0, table_struct.type, table_struct.name), int_type, + "renamed_b"); + auto filter_expr = int_gt(child_expr, 0); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok()); + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok()); + + ASSERT_EQ(request.conjuncts.size(), 1); + const auto& localized_child = request.conjuncts[0]->root()->children()[0]; + EXPECT_EQ(localized_child->expr_name(), "element_at"); + const auto* localized_slot = assert_cast(localized_child->children()[0].get()); + EXPECT_EQ(localized_slot->column_name(), "s"); + EXPECT_EQ(localized_slot->column_id(), 0); + + const auto* localized_literal = + assert_cast(localized_child->children()[1].get()); + Field localized_field; + localized_literal->get_column_ptr()->get(0, localized_field); + ASSERT_EQ(localized_field.get_type(), TYPE_STRING); + EXPECT_EQ(std::string(localized_field.as_string_view()), "b"); +} + +// Scenario: nested element_at(struct, name) localization rewrites both selector names and +// intermediate return types. The outer selector must be prepared against the projected file child +// struct, not the table child struct or the full historical file child struct. +TEST(ColumnMapperScanRequestTest, NestedElementAtConjunctUsesFileChildTypeForRenamedLeaf) { + const auto int_type = i32(); + const auto string_type = str(); + + auto table_new_aa = field_id_col("new_aa", 23, int_type); + auto table_bb = field_id_col("bb", 24, string_type); + auto table_new_a = struct_col("new_a", 20, {table_new_aa, table_bb}); + auto table_struct = struct_col("struct_column2", 19, {table_new_a}); + + auto file_aa = field_id_col("aa", 23, int_type, 0); + auto file_bb = field_id_col("bb", 24, string_type, 1); + auto file_new_a = struct_col("new_a", 20, {file_aa, file_bb}, 0); + auto file_struct = struct_col("struct_column2", 19, {file_new_a}, 10); + + const auto table_slot_expr = table_slot(0, 0, table_struct.type, "struct_column2"); + const auto table_parent_expr = element_at(table_slot_expr, table_new_a.type, "new_a"); + const auto table_leaf_expr = element_at(table_parent_expr, int_type, "new_aa"); + auto filter_expr = binary_predicate(TExprOpcode::EQ, table_leaf_expr, + literal(int_type, Field::create_field(50))); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok()); + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok()); + ASSERT_EQ(request.conjuncts.size(), 1); + + const auto& localized_leaf = request.conjuncts[0]->root()->children()[0]; + ASSERT_EQ(localized_leaf->expr_name(), "element_at"); + const auto& localized_parent = localized_leaf->children()[0]; + ASSERT_EQ(localized_parent->expr_name(), "element_at"); + + const auto* localized_leaf_selector = + assert_cast(localized_leaf->children()[1].get()); + Field localized_leaf_field; + localized_leaf_selector->get_column_ptr()->get(0, localized_leaf_field); + ASSERT_EQ(localized_leaf_field.get_type(), TYPE_STRING); + EXPECT_EQ(std::string(localized_leaf_field.as_string_view()), "aa"); + + const auto* localized_parent_type = assert_cast( + remove_nullable(localized_parent->data_type()).get()); + ASSERT_EQ(localized_parent_type->get_elements().size(), 2); + EXPECT_EQ(localized_parent_type->get_element_name(0), "aa"); + EXPECT_EQ(localized_parent_type->get_element_name(1), "bb"); +} + +// Scenario: output projection reads one struct child while the row filter reads a different nested +// struct child. File-local conjunct rewrite must use the merged scan projection type. In the SQL +// shape below, `SELECT element_at(s, 'c') WHERE element_at(element_at(s, 'b'), 'cc') LIKE ...` +// reads file children `b.cc` and `c`; the localized inner `element_at(s, 'b')` returns +// `Struct(cc)`, not the full old file child `Struct(cc, new_dd)`. +TEST(ColumnMapperScanRequestTest, NestedElementAtConjunctUsesMergedScanProjectionChildType) { + const auto string_type = str(); + const auto int_type = i32(); + + auto table_cc = field_id_col("cc", 23, string_type); + auto table_new_dd = field_id_col("new_dd", 24, int_type); + auto table_b = struct_col("b", 20, {table_cc, table_new_dd}); + auto table_c = field_id_col("c", 25, string_type); + auto full_table_struct = struct_col("struct_column2", 19, {table_b, table_c}); + auto projected_table_struct = struct_col("struct_column2", 19, {table_c}); + + auto file_cc = field_id_col("cc", 23, string_type, 0); + auto file_new_dd = field_id_col("new_dd", 24, int_type, 1); + auto file_b = struct_col("b", 20, {file_cc, file_new_dd}, 0); + auto file_c = field_id_col("c", 25, string_type, 1); + auto file_struct = struct_col("new_struct_column", 19, {file_b, file_c}, 10); + + const auto table_slot_expr = table_slot(0, 0, full_table_struct.type, "struct_column2"); + const auto table_parent_expr = element_at(table_slot_expr, table_b.type, "b"); + const auto table_leaf_expr = element_at(table_parent_expr, string_type, "cc"); + auto filter_expr = like_expr(table_leaf_expr, "NestedC%"); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping({projected_table_struct}, {}, {file_struct}).ok()); + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {projected_table_struct}, &request).ok()); + ASSERT_EQ(request.conjuncts.size(), 1); + ASSERT_EQ(request.predicate_columns.size(), 1); + EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(10)); + + const auto& localized_leaf = request.conjuncts[0]->root()->children()[0]; + ASSERT_EQ(localized_leaf->expr_name(), "element_at"); + const auto& localized_parent = localized_leaf->children()[0]; + ASSERT_EQ(localized_parent->expr_name(), "element_at"); + + const auto* localized_slot = + assert_cast(localized_parent->children()[0].get()); + EXPECT_EQ(localized_slot->column_name(), "new_struct_column"); + // The scan projection keeps the top-level file column id above, while the localized conjunct + // executes on the file-reader Block. The VSlotRef column id is therefore the block position of + // `new_struct_column` in this request, not the file schema id 10. + EXPECT_EQ(localized_slot->column_id(), 0); + + const auto* localized_parent_type = assert_cast( + remove_nullable(localized_parent->data_type()).get()); + ASSERT_EQ(localized_parent_type->get_elements().size(), 1); + EXPECT_EQ(localized_parent_type->get_element_name(0), "cc"); +} + +// Scenario: struct child access through a computed map/array parent is not localized as a file +// conjunct, because the projected value struct can have a different physical child order. +TEST(ColumnMapperScanRequestTest, MapValuesStructChildConjunctStaysTableLevel) { + const auto key_type = str(); + const auto string_type = str(); + const auto int_type = i32(); + + auto table_gender = field_id_col("gender", 17, string_type); + auto table_full_name = field_id_col("full_name", 7, string_type); + auto table_value = struct_col("value", 6, {table_gender, table_full_name}); + auto table_map = map_col("new_map_column", 2, {table_value}, key_type, table_value.type); + + auto file_key = field_id_col("key", 5, key_type, 0); + auto file_age = field_id_col("age", 8, int_type, 0); + auto file_full_name = field_id_col("full_name", 7, string_type, 1); + auto file_gender = field_id_col("gender", 17, string_type, 2); + auto file_value = struct_col("value", 6, {file_age, file_full_name, file_gender}, 1); + auto file_map = + map_col("new_map_column", 2, {file_key, file_value}, key_type, file_value.type, 1); + + const auto map_slot = table_slot(0, 0, table_map.type, "new_map_column"); + const auto values_expr = map_values(map_slot, table_value.type); + const auto first_value = array_element_at(values_expr, table_value.type, 1); + const auto full_name_expr = element_at(first_value, string_type, "full_name"); + auto filter_expr = like_expr(full_name_expr, "J%"); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok()); + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_map}, &request).ok()); + + EXPECT_TRUE(request.conjuncts.empty()); + ASSERT_EQ(request.predicate_columns.size(), 1); + EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(1)); + ASSERT_FALSE(request.predicate_columns[0].project_all_children); + ASSERT_EQ(request.predicate_columns[0].children.size(), 1); + EXPECT_EQ(request.predicate_columns[0].children[0].local_id(), 1); + EXPECT_TRUE(request.column_predicate_filters.empty()); +} + +// Scenario: MAP_KEYS only reads map keys, but localizing it by wrapping the evolved file map slot +// in CAST(file_map AS table_map) would still cast the old value struct to the new value struct. +// Keep the conjunct table-level when the map value schema changed. +TEST(ColumnMapperScanRequestTest, MapKeysConjunctWithEvolvedValueStructStaysTableLevel) { + const auto key_type = str(); + const auto string_type = str(); + const auto int_type = i32(); + + auto table_age = field_id_col("age", 8, int_type); + auto table_full_name = field_id_col("full_name", 7, string_type); + auto table_gender = field_id_col("gender", 17, string_type); + auto table_value = struct_col("value", 6, {table_age, table_full_name, table_gender}); + auto table_key = field_id_col("key", 5, key_type); + auto table_map = + map_col("new_map_column", 2, {table_key, table_value}, key_type, table_value.type); + + auto file_key = field_id_col("key", 5, key_type, 0); + auto file_name = field_id_col("name", 18, string_type, 0); + auto file_age = field_id_col("age", 8, int_type, 1); + auto file_value = struct_col("value", 6, {file_name, file_age}, 1); + auto file_map = map_col("map_column", 2, {file_key, file_value}, key_type, file_value.type, 1); + + const auto map_slot = table_slot(0, 0, table_map.type, "new_map_column"); + const auto keys_expr = map_keys(map_slot, key_type); + auto filter_expr = array_contains( + keys_expr, literal(key_type, Field::create_field("person5"))); + TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr), + .global_indices = {GlobalIndex(0)}}; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok()); + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_map}, &request).ok()); + + EXPECT_TRUE(request.conjuncts.empty()); + EXPECT_TRUE(request.non_predicate_columns.empty()); + ASSERT_EQ(request.predicate_columns.size(), 1); + EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(1)); + EXPECT_TRUE(request.column_predicate_filters.empty()); +} + +// Scenario: an array element struct projection only contains missing/default children; the mapper +// falls back to reading the full physical element so the reader never gets an empty projection. +TEST(ColumnMapperScanRequestTest, ArrayStructOnlyMissingElementChildUsesFullFileProjection) { + const auto int_type = i32(); + const auto string_type = str(); + + auto file_a = field_id_col("a", 1, int_type, 0); + auto file_b = field_id_col("b", 2, int_type, 1); + auto file_element = struct_col("element", 0, {file_a, file_b}, 0); + auto file_array = array_col("xs", 10, file_element, 10); + + auto missing_child = field_id_col("missing_child", 99, string_type); + auto table_element = struct_col("element", 0, {missing_child}); + auto table_array = array_col("xs", 10, table_element); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping({table_array}, {}, {file_array}).ok()); + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_array}, &request).ok()); + + ASSERT_EQ(request.non_predicate_columns.size(), 1); + EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(10)); + EXPECT_TRUE(request.non_predicate_columns[0].project_all_children); + EXPECT_TRUE(request.non_predicate_columns[0].children.empty()); + ASSERT_EQ(mapper.mappings().size(), 1); + EXPECT_FALSE(mapper.mappings()[0].is_trivial); +} + +// Scenario: a map value struct projection only contains missing/default children; the mapper keeps +// the map key/value shape and reads the full physical value struct instead of an empty value child. +TEST(ColumnMapperScanRequestTest, MapValueStructOnlyMissingChildUsesFullValueProjection) { + const auto key_type = i32(); + const auto int_type = i32(); + const auto string_type = str(); + + auto file_key = field_id_col("key", 0, key_type, 0); + auto file_a = field_id_col("a", 1, int_type, 0); + auto file_b = field_id_col("b", 2, int_type, 1); + auto file_value = struct_col("value", 1, {file_a, file_b}, 1); + auto file_map = map_col("m", 10, {file_key, file_value}, key_type, file_value.type, 10); + + auto missing_child = field_id_col("missing_child", 99, string_type); + auto table_value = struct_col("value", 1, {missing_child}); + auto table_map = map_col("m", 10, {table_value}, key_type, table_value.type); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok()); + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_map}, &request).ok()); + + ASSERT_EQ(request.non_predicate_columns.size(), 1); + const auto& projection = request.non_predicate_columns[0]; + EXPECT_EQ(projection.column_id(), LocalColumnId(10)); + ASSERT_FALSE(projection.project_all_children); + ASSERT_EQ(projection.children.size(), 1); + EXPECT_EQ(projection.children[0].local_id(), 1); + EXPECT_TRUE(projection.children[0].project_all_children); + EXPECT_TRUE(projection.children[0].children.empty()); + ASSERT_EQ(mapper.mappings().size(), 1); + EXPECT_FALSE(mapper.mappings()[0].is_trivial); +} + +// ---------------------------------------------------------------------- +// L1 complex schema evolution and split isolation. +// These tests call the mapper repeatedly with different file schemas and +// verify that split-local state is rebuilt instead of leaked. +// ---------------------------------------------------------------------- + +TEST(ColumnMapperSchemaEvolutionTest, StructChildrenHandleMissingRenameReorderAndDroppedFields) { + const auto int_type = i32(); + const auto string_type = str(); + auto table_a = field_id_col("a", 1, int_type); + auto table_renamed_b = field_id_col("renamed_b", 2, string_type); + auto table_c = field_id_col("c", 3, int_type); + auto table_struct = struct_col("s", 10, {table_a, table_renamed_b, table_c}); + + auto v1_a = field_id_col("a", 1, int_type, 0); + auto v1_b = field_id_col("b", 2, string_type, 1); + auto file_v1 = struct_col("s", 10, {v1_a, v1_b}, 5); + + auto v2_b = field_id_col("b", 2, string_type, 0); + auto v2_a = field_id_col("a", 1, int_type, 1); + auto v2_c = field_id_col("c", 3, int_type, 2); + auto file_v2 = struct_col("s", 10, {v2_b, v2_a, v2_c}, 8); + + TableColumnMapper v1_mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(v1_mapper.create_mapping({table_struct}, {}, {file_v1}).ok()); + FileScanRequest v1_request; + ASSERT_TRUE(v1_mapper.create_scan_request({}, {}, {table_struct}, &v1_request).ok()); + + const auto& v1_mapping = v1_mapper.mappings()[0]; + ASSERT_EQ(v1_mapping.child_mappings.size(), 3); + EXPECT_EQ(*v1_mapping.child_mappings[0].file_local_id, 0); + EXPECT_EQ(*v1_mapping.child_mappings[1].file_local_id, 1); + EXPECT_FALSE(v1_mapping.child_mappings[2].file_local_id.has_value()); + ASSERT_EQ(v1_request.non_predicate_columns.size(), 1); + EXPECT_EQ(v1_request.non_predicate_columns[0].column_id(), LocalColumnId(5)); + EXPECT_TRUE(v1_request.non_predicate_columns[0].project_all_children); + + TableColumnMapper v2_mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(v2_mapper.create_mapping({table_struct}, {}, {file_v2}).ok()); + FileScanRequest v2_request; + ASSERT_TRUE(v2_mapper.create_scan_request({}, {}, {table_struct}, &v2_request).ok()); + + const auto& v2_mapping = v2_mapper.mappings()[0]; + ASSERT_EQ(v2_mapping.child_mappings.size(), 3); + EXPECT_EQ(*v2_mapping.child_mappings[0].file_local_id, 1); + EXPECT_EQ(*v2_mapping.child_mappings[1].file_local_id, 0); + EXPECT_EQ(*v2_mapping.child_mappings[2].file_local_id, 2); + ASSERT_EQ(v2_request.non_predicate_columns.size(), 1); + EXPECT_EQ(v2_request.non_predicate_columns[0].column_id(), LocalColumnId(8)); + EXPECT_TRUE(v2_request.non_predicate_columns[0].project_all_children); +} + +TEST(ColumnMapperSchemaEvolutionTest, DroppedStructChildrenAreNotRead) { + const auto int_type = i32(); + const auto string_type = str(); + auto table_a = field_id_col("a", 1, int_type); + auto table_struct = struct_col("s", 10, {table_a}); + + auto file_a = field_id_col("a", 1, int_type, 0); + auto file_b = field_id_col("b", 2, string_type, 1); + auto file_c = field_id_col("c", 3, int_type, 2); + auto file_struct = struct_col("s", 10, {file_a, file_b, file_c}, 5); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID}); + ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok()); + + FileScanRequest request; + ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_struct}, &request).ok()); + + ASSERT_EQ(request.non_predicate_columns.size(), 1); + const auto& projection = request.non_predicate_columns[0]; + EXPECT_EQ(projection.column_id(), LocalColumnId(5)); + ASSERT_FALSE(projection.project_all_children); + EXPECT_EQ(projection_ids(projection.children), std::vector({0})); +} + +TEST(ColumnMapperSchemaEvolutionTest, ReusedMapperClearsSplitLocalConstantsAndFileIds) { + const auto int_type = i32(); + auto id = name_col("id", int_type); + auto added = name_col("added", int_type); + added.default_expr = + VExprContext::create_shared(literal(int_type, Field::create_field(7))); + const std::vector table_schema = {id, added}; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, {name_col("id", int_type, 0)}).ok()); + ASSERT_EQ(mapper.mappings().size(), 2); + EXPECT_EQ(*mapper.mappings()[0].file_local_id, 0); + expect_constant(mapper, mapper.mappings()[1], 1, int_type); + + ASSERT_TRUE(mapper.create_mapping(table_schema, {}, + {name_col("id", int_type, 3), name_col("added", int_type, 4)}) + .ok()); + ASSERT_EQ(mapper.mappings().size(), 2); + EXPECT_EQ(*mapper.mappings()[0].file_local_id, 3); + EXPECT_EQ(*mapper.mappings()[1].file_local_id, 4); + EXPECT_TRUE(mapper.constant_map().empty()); +} + +// ---------------------------------------------------------------------- +// L2 cast-aware filter localization tests. +// These tests belong to TableColumnMapper rather than Cast: they assert when the mapper builds +// projection casts, rewrites table predicates to file-local slot casts, converts literals to the +// current split's file type, and keeps repeated scan-request rewrites idempotent. +// ---------------------------------------------------------------------- + +// Scenario: table/file primitive types differ, so the visible mapping must build a cast projection. +TEST_F(ColumnMapperCastTest, ColumnMapperBuildsCastProjectionForTypeMismatch) { + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + auto table_column = name_col("value", i64()); + std::vector projected_columns {table_column}; + + auto file_field = name_col("value", i32(), 0); + std::vector file_schema {file_field}; + + auto status = mapper.create_mapping(projected_columns, {}, file_schema); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(mapper.mappings().size(), 1); + FileScanRequest file_request; + status = mapper.create_scan_request({}, {}, projected_columns, &file_request); + ASSERT_TRUE(status.ok()) << status; + const auto& mapping = mapper.mappings()[0]; + EXPECT_FALSE(mapping.is_trivial); + ASSERT_NE(mapping.projection, nullptr); + + Block block; + block.insert(ColumnHelper::create_column_with_name({11, 22})); + int result_column_id = -1; + status = prepare_open_execute(mapping.projection.get(), &block, &result_column_id); + ASSERT_TRUE(status.ok()) << status; + + const auto& result_column = + assert_cast(*block.get_by_position(result_column_id).column); + EXPECT_EQ(result_column.get_data()[0], 11); + EXPECT_EQ(result_column.get_data()[1], 22); + + mapping.projection->close(); +} + +// Scenario: equivalent table/file types keep the mapping trivial and avoid unnecessary projection casts. +TEST_F(ColumnMapperCastTest, ColumnMapperTreatsEquivalentTypesAsTrivial) { + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + auto table_column = name_col("value", i32()); + std::vector projected_columns {table_column}; + + auto file_field = name_col("value", i32(), 0); + std::vector file_schema {file_field}; + + auto status = mapper.create_mapping(projected_columns, {}, file_schema); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(mapper.mappings().size(), 1); + EXPECT_TRUE(mapper.mappings()[0].is_trivial); +} + +// Scenario: a table predicate on a widened type is localized by casting the file slot to table type. +TEST_F(ColumnMapperCastTest, ColumnMapperBuildsCastFilterForTypeMismatch) { + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + auto table_column = name_col("value", i64()); + std::vector projected_columns {table_column}; + + auto file_field = name_col("value", i32(), 0); + std::vector file_schema {file_field}; + + auto status = mapper.create_mapping(projected_columns, {}, file_schema); + ASSERT_TRUE(status.ok()) << status; + + auto predicate = std::make_shared(15); + predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value")); + TableFilter table_filter; + table_filter.conjunct = VExprContext::create_shared(predicate); + table_filter.global_indices = {GlobalIndex(0)}; + + FileScanRequest file_request; + ASSERT_TRUE( + mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state) + .ok()); + ASSERT_EQ(file_request.conjuncts.size(), 1); + ASSERT_EQ(projection_ids(file_request.predicate_columns), std::vector({0})); + const auto& localized_expr = file_request.conjuncts[0]->root(); + ASSERT_EQ(localized_expr->get_num_children(), 1); + const auto& localized_child = localized_expr->children()[0]; + ASSERT_NE(dynamic_cast(localized_child.get()), nullptr); + ASSERT_EQ(localized_child->get_num_children(), 1); + const auto* localized_slot = assert_cast(localized_child->children()[0].get()); + EXPECT_EQ(localized_slot->column_id(), 0); + EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type)); + EXPECT_TRUE(localized_child->data_type()->equals(*table_column.type)); + + Block block; + block.insert(ColumnHelper::create_column_with_name({11, 22})); + auto* conjunct = file_request.conjuncts[0].get(); + status = conjunct->prepare(&state, RowDescriptor()); + ASSERT_TRUE(status.ok()) << status; + status = conjunct->open(&state); + ASSERT_TRUE(status.ok()) << status; + IColumn::Filter filter(block.rows(), 1); + bool can_filter_all = false; + status = conjunct->execute_filter(&block, filter.data(), block.rows(), false, &can_filter_all); + ASSERT_TRUE(status.ok()) << status; + EXPECT_FALSE(can_filter_all); + ASSERT_EQ(filter.size(), 2); + EXPECT_EQ(filter[0], 0); + EXPECT_EQ(filter[1], 1); + + file_request.conjuncts[0]->close(); +} + +// Scenario: an already prepared table filter can still be cloned, rewritten, prepared, and opened as a file-local filter. +TEST_F(ColumnMapperCastTest, ColumnMapperRepreparesRewrittenPreparedFilter) { + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + auto table_column = name_col("value", i64()); + std::vector projected_columns {table_column}; + + auto file_field = name_col("value", i32(), 0); + std::vector file_schema {file_field}; + + auto status = mapper.create_mapping(projected_columns, {}, file_schema); + ASSERT_TRUE(status.ok()) << status; + + auto cast = Cast::create_shared(table_column.type); + cast->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value")); + TableFilter table_filter; + table_filter.conjunct = VExprContext::create_shared(cast); + table_filter.global_indices = {GlobalIndex(0)}; + status = table_filter.conjunct->prepare(&state, RowDescriptor()); + ASSERT_TRUE(status.ok()) << status; + status = table_filter.conjunct->open(&state); + ASSERT_TRUE(status.ok()) << status; + + FileScanRequest file_request; + ASSERT_TRUE( + mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state) + .ok()); + ASSERT_EQ(file_request.conjuncts.size(), 1); + const auto& localized_expr = file_request.conjuncts[0]->root(); + ASSERT_NE(dynamic_cast(localized_expr.get()), nullptr); + ASSERT_EQ(localized_expr->get_num_children(), 1); + const auto* localized_slot = assert_cast(localized_expr->children()[0].get()); + EXPECT_EQ(localized_slot->column_id(), 0); + EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type)); + + status = file_request.conjuncts[0]->prepare(&state, RowDescriptor()); + ASSERT_TRUE(status.ok()) << status; + status = file_request.conjuncts[0]->open(&state); + ASSERT_TRUE(status.ok()) << status; + + file_request.conjuncts[0]->close(); +} + +// Scenario: slot-literal comparison rewrites the literal to the current file type when conversion is safe. +TEST_F(ColumnMapperCastTest, ColumnMapperCastsLiteralForSlotLiteralPredicateTypeMismatch) { + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + auto table_column = name_col("value", i64()); + std::vector projected_columns {table_column}; + + auto file_field = name_col("value", i32(), 0); + std::vector file_schema {file_field}; + + auto status = mapper.create_mapping(projected_columns, {}, file_schema); + ASSERT_TRUE(status.ok()) << status; + + auto predicate = std::make_shared(TExprOpcode::GT); + predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value")); + predicate->add_child( + VLiteral::create_shared(table_column.type, Field::create_field(15))); + TableFilter table_filter; + table_filter.conjunct = VExprContext::create_shared(predicate); + table_filter.global_indices = {GlobalIndex(0)}; + + FileScanRequest file_request; + ASSERT_TRUE( + mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state) + .ok()); + ASSERT_EQ(file_request.conjuncts.size(), 1); + ASSERT_EQ(projection_ids(file_request.predicate_columns), std::vector({0})); + const auto& localized_expr = file_request.conjuncts[0]->root(); + ASSERT_EQ(localized_expr->get_num_children(), 2); + const auto* localized_slot = assert_cast(localized_expr->children()[0].get()); + EXPECT_EQ(localized_slot->column_id(), 0); + EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type)); + const auto& localized_literal = localized_expr->children()[1]; + EXPECT_TRUE(localized_literal->is_literal()); + EXPECT_TRUE(localized_literal->data_type()->equals(*file_field.type)); + + Block block; + block.insert(ColumnHelper::create_column_with_name({11, 22})); + auto* conjunct = file_request.conjuncts[0].get(); + status = conjunct->prepare(&state, RowDescriptor()); + ASSERT_TRUE(status.ok()) << status; + status = conjunct->open(&state); + ASSERT_TRUE(status.ok()) << status; + IColumn::Filter filter(block.rows(), 1); + bool can_filter_all = false; + status = conjunct->execute_filter(&block, filter.data(), block.rows(), false, &can_filter_all); + ASSERT_TRUE(status.ok()) << status; + EXPECT_FALSE(can_filter_all); + ASSERT_EQ(filter.size(), 2); + EXPECT_EQ(filter[0], 0); + EXPECT_EQ(filter[1], 1); + + file_request.conjuncts[0]->close(); +} + +// Scenario: literal-slot comparison also rewrites the literal side and preserves operand order. +TEST_F(ColumnMapperCastTest, ColumnMapperCastsLiteralForLiteralSlotPredicateTypeMismatch) { + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + auto table_column = name_col("value", i64()); + std::vector projected_columns {table_column}; + + auto file_field = name_col("value", i32(), 0); + std::vector file_schema {file_field}; + + auto status = mapper.create_mapping(projected_columns, {}, file_schema); + ASSERT_TRUE(status.ok()) << status; + + auto predicate = std::make_shared(TExprOpcode::LT); + predicate->add_child( + VLiteral::create_shared(table_column.type, Field::create_field(15))); + predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value")); + TableFilter table_filter; + table_filter.conjunct = VExprContext::create_shared(predicate); + table_filter.global_indices = {GlobalIndex(0)}; + + FileScanRequest file_request; + ASSERT_TRUE( + mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state) + .ok()); + ASSERT_EQ(file_request.conjuncts.size(), 1); + const auto& localized_expr = file_request.conjuncts[0]->root(); + ASSERT_EQ(localized_expr->get_num_children(), 2); + const auto& localized_literal = localized_expr->children()[0]; + EXPECT_TRUE(localized_literal->is_literal()); + EXPECT_TRUE(localized_literal->data_type()->equals(*file_field.type)); + const auto* localized_slot = assert_cast(localized_expr->children()[1].get()); + EXPECT_EQ(localized_slot->column_id(), 0); + EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type)); + + Block block; + block.insert(ColumnHelper::create_column_with_name({11, 22})); + auto* conjunct = file_request.conjuncts[0].get(); + status = conjunct->prepare(&state, RowDescriptor()); + ASSERT_TRUE(status.ok()) << status; + status = conjunct->open(&state); + ASSERT_TRUE(status.ok()) << status; + IColumn::Filter filter(block.rows(), 1); + bool can_filter_all = false; + status = conjunct->execute_filter(&block, filter.data(), block.rows(), false, &can_filter_all); + ASSERT_TRUE(status.ok()) << status; + EXPECT_FALSE(can_filter_all); + ASSERT_EQ(filter.size(), 2); + EXPECT_EQ(filter[0], 0); + EXPECT_EQ(filter[1], 1); + + file_request.conjuncts[0]->close(); +} + +// Scenario: IN predicate literals are all rewritten to file type when every literal conversion is safe. +TEST_F(ColumnMapperCastTest, ColumnMapperCastsInPredicateLiteralsForTypeMismatch) { + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + auto table_column = name_col("value", i64()); + std::vector projected_columns {table_column}; + + auto file_field = name_col("value", i32(), 0); + std::vector file_schema {file_field}; + + auto status = mapper.create_mapping(projected_columns, {}, file_schema); + ASSERT_TRUE(status.ok()) << status; + + auto predicate = create_in_predicate(); + predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value")); + predicate->add_child( + VLiteral::create_shared(table_column.type, Field::create_field(15))); + predicate->add_child( + VLiteral::create_shared(table_column.type, Field::create_field(22))); + TableFilter table_filter; + table_filter.conjunct = VExprContext::create_shared(predicate); + table_filter.global_indices = {GlobalIndex(0)}; + + FileScanRequest file_request; + ASSERT_TRUE( + mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state) + .ok()); + ASSERT_EQ(file_request.conjuncts.size(), 1); + ASSERT_EQ(projection_ids(file_request.predicate_columns), std::vector({0})); + const auto& localized_expr = file_request.conjuncts[0]->root(); + ASSERT_EQ(localized_expr->get_num_children(), 3); + const auto* localized_slot = assert_cast(localized_expr->children()[0].get()); + EXPECT_EQ(localized_slot->column_id(), 0); + EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type)); + EXPECT_TRUE(localized_expr->children()[1]->is_literal()); + EXPECT_TRUE(localized_expr->children()[1]->data_type()->equals(*file_field.type)); + EXPECT_TRUE(localized_expr->children()[2]->is_literal()); + EXPECT_TRUE(localized_expr->children()[2]->data_type()->equals(*file_field.type)); +} + +// Scenario: IN predicate falls back to casting the file slot when any literal cannot be converted safely. +TEST_F(ColumnMapperCastTest, ColumnMapperFallsBackToSlotCastWhenInPredicateLiteralRewriteFails) { + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + auto table_column = name_col("value", str()); + std::vector projected_columns {table_column}; + + auto file_field = name_col("value", i32(), 0); + std::vector file_schema {file_field}; + + auto status = mapper.create_mapping(projected_columns, {}, file_schema); + ASSERT_TRUE(status.ok()) << status; + + auto predicate = create_in_predicate(); + predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value")); + predicate->add_child( + VLiteral::create_shared(table_column.type, Field::create_field("10"))); + predicate->add_child( + VLiteral::create_shared(table_column.type, Field::create_field("bad"))); + TableFilter table_filter; + table_filter.conjunct = VExprContext::create_shared(predicate); + table_filter.global_indices = {GlobalIndex(0)}; + + FileScanRequest file_request; + ASSERT_TRUE( + mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state) + .ok()); + ASSERT_EQ(file_request.conjuncts.size(), 1); + const auto& localized_expr = file_request.conjuncts[0]->root(); + ASSERT_EQ(localized_expr->get_num_children(), 3); + const auto& localized_child = localized_expr->children()[0]; + ASSERT_NE(dynamic_cast(localized_child.get()), nullptr); + ASSERT_EQ(localized_child->get_num_children(), 1); + const auto* localized_slot = assert_cast(localized_child->children()[0].get()); + EXPECT_EQ(localized_slot->column_id(), 0); + EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type)); + EXPECT_TRUE(localized_child->data_type()->equals(*table_column.type)); + EXPECT_TRUE(localized_expr->children()[1]->is_literal()); + EXPECT_TRUE(localized_expr->children()[1]->data_type()->equals(*table_column.type)); + EXPECT_TRUE(localized_expr->children()[2]->is_literal()); + EXPECT_TRUE(localized_expr->children()[2]->data_type()->equals(*table_column.type)); +} + +// Scenario: split-local IN literal rewrites do not mutate the original table filter across different file schemas. +TEST_F(ColumnMapperCastTest, ColumnMapperDoesNotLeakRewrittenInPredicateLiteralAcrossSplits) { + auto table_column = name_col("value", i64()); + std::vector projected_columns {table_column}; + + auto predicate = create_in_predicate(); + predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value")); + predicate->add_child( + VLiteral::create_shared(table_column.type, Field::create_field(15))); + predicate->add_child( + VLiteral::create_shared(table_column.type, Field::create_field(22))); + TableFilter table_filter; + table_filter.conjunct = VExprContext::create_shared(predicate); + table_filter.global_indices = {GlobalIndex(0)}; + + auto int_file_field = name_col("value", i32(), 0); + TableColumnMapper int_mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(int_mapper.create_mapping(projected_columns, {}, {int_file_field}).ok()); + FileScanRequest int_request; + ASSERT_TRUE(int_mapper + .create_scan_request({table_filter}, {}, projected_columns, &int_request, + &state) + .ok()); + ASSERT_EQ(int_request.conjuncts.size(), 1); + const auto& int_localized_expr = int_request.conjuncts[0]->root(); + ASSERT_EQ(int_localized_expr->get_num_children(), 3); + EXPECT_TRUE(int_localized_expr->children()[1]->is_literal()); + EXPECT_TRUE(int_localized_expr->children()[1]->data_type()->equals(*int_file_field.type)); + EXPECT_TRUE(int_localized_expr->children()[2]->is_literal()); + EXPECT_TRUE(int_localized_expr->children()[2]->data_type()->equals(*int_file_field.type)); + + auto bigint_file_field = name_col("value", i64(), 0); + TableColumnMapper bigint_mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(bigint_mapper.create_mapping(projected_columns, {}, {bigint_file_field}).ok()); + FileScanRequest bigint_request; + ASSERT_TRUE(bigint_mapper + .create_scan_request({table_filter}, {}, projected_columns, &bigint_request, + &state) + .ok()); + ASSERT_EQ(bigint_request.conjuncts.size(), 1); + const auto& bigint_localized_expr = bigint_request.conjuncts[0]->root(); + ASSERT_EQ(bigint_localized_expr->get_num_children(), 3); + const auto* localized_slot = + assert_cast(bigint_localized_expr->children()[0].get()); + EXPECT_EQ(localized_slot->column_id(), 0); + EXPECT_TRUE(localized_slot->data_type()->equals(*bigint_file_field.type)); + EXPECT_TRUE(bigint_localized_expr->children()[1]->is_literal()); + EXPECT_TRUE(bigint_localized_expr->children()[1]->data_type()->equals(*bigint_file_field.type)); + EXPECT_TRUE(bigint_localized_expr->children()[2]->is_literal()); + EXPECT_TRUE(bigint_localized_expr->children()[2]->data_type()->equals(*bigint_file_field.type)); +} + +// Scenario: binary predicate falls back to casting the file slot when literal conversion fails. +TEST_F(ColumnMapperCastTest, ColumnMapperFallsBackToSlotCastWhenLiteralRewriteFails) { + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + auto table_column = name_col("value", str()); + std::vector projected_columns {table_column}; + + auto file_field = name_col("value", i32(), 0); + std::vector file_schema {file_field}; + + auto status = mapper.create_mapping(projected_columns, {}, file_schema); + ASSERT_TRUE(status.ok()) << status; + + auto predicate = std::make_shared(TExprOpcode::GT); + predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value")); + predicate->add_child( + VLiteral::create_shared(table_column.type, Field::create_field("bad"))); + TableFilter table_filter; + table_filter.conjunct = VExprContext::create_shared(predicate); + table_filter.global_indices = {GlobalIndex(0)}; + + FileScanRequest file_request; + ASSERT_TRUE( + mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state) + .ok()); + ASSERT_EQ(file_request.conjuncts.size(), 1); + const auto& localized_expr = file_request.conjuncts[0]->root(); + ASSERT_EQ(localized_expr->get_num_children(), 2); + const auto& localized_child = localized_expr->children()[0]; + ASSERT_NE(dynamic_cast(localized_child.get()), nullptr); + ASSERT_EQ(localized_child->get_num_children(), 1); + const auto* localized_slot = assert_cast(localized_child->children()[0].get()); + EXPECT_EQ(localized_slot->column_id(), 0); + EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type)); + EXPECT_TRUE(localized_child->data_type()->equals(*table_column.type)); + EXPECT_TRUE(localized_expr->children()[1]->is_literal()); + EXPECT_TRUE(localized_expr->children()[1]->data_type()->equals(*table_column.type)); +} + +// Scenario: split-local binary literal rewrite does not leak into a later split with a different file type. +TEST_F(ColumnMapperCastTest, ColumnMapperDoesNotLeakRewrittenLiteralAcrossSplits) { + auto table_column = name_col("value", i64()); + std::vector projected_columns {table_column}; + + auto predicate = std::make_shared(TExprOpcode::GT); + predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value")); + predicate->add_child( + VLiteral::create_shared(table_column.type, Field::create_field(15))); + TableFilter table_filter; + table_filter.conjunct = VExprContext::create_shared(predicate); + table_filter.global_indices = {GlobalIndex(0)}; + + auto int_file_field = name_col("value", i32(), 0); + TableColumnMapper int_mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(int_mapper.create_mapping(projected_columns, {}, {int_file_field}).ok()); + FileScanRequest int_request; + ASSERT_TRUE(int_mapper + .create_scan_request({table_filter}, {}, projected_columns, &int_request, + &state) + .ok()); + ASSERT_EQ(int_request.conjuncts.size(), 1); + const auto& int_localized_expr = int_request.conjuncts[0]->root(); + ASSERT_EQ(int_localized_expr->get_num_children(), 2); + EXPECT_TRUE(int_localized_expr->children()[1]->is_literal()); + EXPECT_TRUE(int_localized_expr->children()[1]->data_type()->equals(*int_file_field.type)); + + auto bigint_file_field = name_col("value", i64(), 0); + TableColumnMapper bigint_mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(bigint_mapper.create_mapping(projected_columns, {}, {bigint_file_field}).ok()); + FileScanRequest bigint_request; + ASSERT_TRUE(bigint_mapper + .create_scan_request({table_filter}, {}, projected_columns, &bigint_request, + &state) + .ok()); + ASSERT_EQ(bigint_request.conjuncts.size(), 1); + const auto& bigint_localized_expr = bigint_request.conjuncts[0]->root(); + ASSERT_EQ(bigint_localized_expr->get_num_children(), 2); + const auto* localized_slot = + assert_cast(bigint_localized_expr->children()[0].get()); + EXPECT_EQ(localized_slot->column_id(), 0); + EXPECT_TRUE(localized_slot->data_type()->equals(*bigint_file_field.type)); + EXPECT_TRUE(bigint_localized_expr->children()[1]->is_literal()); + EXPECT_TRUE(bigint_localized_expr->children()[1]->data_type()->equals(*bigint_file_field.type)); +} + +// Scenario: an explicit user/table cast is preserved while the underlying slot is localized correctly. +TEST_F(ColumnMapperCastTest, ColumnMapperKeepsExplicitSlotCastInSlotLiteralPredicate) { + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + auto table_column = name_col("value", i64()); + std::vector projected_columns {table_column}; + + auto file_field = name_col("value", i32(), 0); + std::vector file_schema {file_field}; + + auto status = mapper.create_mapping(projected_columns, {}, file_schema); + ASSERT_TRUE(status.ok()) << status; + + auto explicit_cast = Cast::create_shared(std::make_shared()); + explicit_cast->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value")); + auto predicate = std::make_shared(TExprOpcode::GT); + predicate->add_child(explicit_cast); + predicate->add_child( + VLiteral::create_shared(table_column.type, Field::create_field(15))); + TableFilter table_filter; + table_filter.conjunct = VExprContext::create_shared(predicate); + table_filter.global_indices = {GlobalIndex(0)}; + + FileScanRequest file_request; + ASSERT_TRUE( + mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state) + .ok()); + ASSERT_EQ(file_request.conjuncts.size(), 1); + const auto& localized_expr = file_request.conjuncts[0]->root(); + ASSERT_EQ(localized_expr->get_num_children(), 2); + const auto& localized_cast = localized_expr->children()[0]; + ASSERT_NE(dynamic_cast(localized_cast.get()), nullptr); + EXPECT_TRUE(localized_cast->data_type()->equals(DataTypeString())); + ASSERT_EQ(localized_cast->get_num_children(), 1); + ASSERT_NE(dynamic_cast(localized_cast->children()[0].get()), nullptr); + const auto* localized_slot = + assert_cast(localized_cast->children()[0]->children()[0].get()); + EXPECT_EQ(localized_slot->column_id(), 0); + EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type)); +} + +// Scenario: repeated scan request creation stays idempotent and does not wrap Cast(Cast(slot)). +TEST_F(ColumnMapperCastTest, ColumnMapperDoesNotNestCastFilterAcrossScanRequests) { + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + auto table_column = name_col("value", i64()); + std::vector projected_columns {table_column}; + + auto file_field = name_col("value", i32(), 0); + std::vector file_schema {file_field}; + + auto status = mapper.create_mapping(projected_columns, {}, file_schema); + ASSERT_TRUE(status.ok()) << status; + + auto predicate = std::make_shared(15); + predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value")); + TableFilter table_filter; + table_filter.conjunct = VExprContext::create_shared(predicate); + table_filter.global_indices = {GlobalIndex(0)}; + + FileScanRequest first_request; + ASSERT_TRUE(mapper.create_scan_request({table_filter}, {}, projected_columns, &first_request, + &state) + .ok()); + FileScanRequest second_request; + ASSERT_TRUE(mapper.create_scan_request({table_filter}, {}, projected_columns, &second_request, + &state) + .ok()); + + ASSERT_EQ(second_request.conjuncts.size(), 1); + const auto& localized_expr = second_request.conjuncts[0]->root(); + ASSERT_EQ(localized_expr->get_num_children(), 1); + const auto& localized_child = localized_expr->children()[0]; + ASSERT_NE(dynamic_cast(localized_child.get()), nullptr); + ASSERT_EQ(localized_child->get_num_children(), 1); + const auto* localized_slot = assert_cast(localized_child->children()[0].get()); + EXPECT_EQ(localized_slot->column_id(), 0); +} + +// Scenario: a filter cloned from a previous cast rewrite is adjusted to the next split's matching file type. +TEST_F(ColumnMapperCastTest, ColumnMapperRewritesPreviousCastFilterToMatchingSplitType) { + auto table_column = name_col("value", i64()); + std::vector projected_columns {table_column}; + + auto predicate = std::make_shared(15); + predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value")); + TableFilter table_filter; + table_filter.conjunct = VExprContext::create_shared(predicate); + table_filter.global_indices = {GlobalIndex(0)}; + + auto int_file_field = name_col("value", i32(), 0); + + TableColumnMapper int_mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(int_mapper.create_mapping(projected_columns, {}, {int_file_field}).ok()); + FileScanRequest int_request; + ASSERT_TRUE(int_mapper + .create_scan_request({table_filter}, {}, projected_columns, &int_request, + &state) + .ok()); + + const auto& int_localized_expr = int_request.conjuncts[0]->root(); + ASSERT_EQ(int_localized_expr->get_num_children(), 1); + ASSERT_NE(dynamic_cast(int_localized_expr->children()[0].get()), nullptr); + + auto bigint_file_field = name_col("value", i64(), 0); + + TableColumnMapper bigint_mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(bigint_mapper.create_mapping(projected_columns, {}, {bigint_file_field}).ok()); + FileScanRequest bigint_request; + ASSERT_TRUE(bigint_mapper + .create_scan_request({table_filter}, {}, projected_columns, &bigint_request, + &state) + .ok()); + + const auto& bigint_localized_expr = bigint_request.conjuncts[0]->root(); + ASSERT_EQ(bigint_localized_expr->get_num_children(), 1); + const auto& bigint_localized_child = bigint_localized_expr->children()[0]; + const auto* localized_slot = assert_cast(bigint_localized_child.get()); + EXPECT_EQ(localized_slot->column_id(), 0); + EXPECT_TRUE(localized_slot->data_type()->equals(*bigint_file_field.type)); + + Block block; + block.insert(ColumnHelper::create_column_with_name({11, 22})); + auto* conjunct = bigint_request.conjuncts[0].get(); + auto status = conjunct->prepare(&state, RowDescriptor()); + ASSERT_TRUE(status.ok()) << status; + status = conjunct->open(&state); + ASSERT_TRUE(status.ok()) << status; + IColumn::Filter filter(block.rows(), 1); + bool can_filter_all = false; + status = conjunct->execute_filter(&block, filter.data(), block.rows(), false, &can_filter_all); + ASSERT_TRUE(status.ok()) << status; + EXPECT_FALSE(can_filter_all); + ASSERT_EQ(filter.size(), 2); + EXPECT_EQ(filter[0], 0); + EXPECT_EQ(filter[1], 1); + conjunct->close(); +} + +// Scenario: localized slot keeps table slot id while column id tracks the file block position. +TEST_F(ColumnMapperCastTest, ColumnMapperKeepsTableSlotIdWhenFileBlockPositionChanges) { + auto table_column = name_col("value", i64()); + std::vector projected_columns {table_column}; + + auto file_field = name_col("value", i64(), 10); + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + ASSERT_TRUE(mapper.create_mapping(projected_columns, {}, {file_field}).ok()); + + auto predicate = std::make_shared(15); + predicate->add_child(VSlotRef::create_shared(7, 0, -1, table_column.type, "value")); + TableFilter table_filter; + table_filter.conjunct = VExprContext::create_shared(predicate); + table_filter.global_indices = {GlobalIndex(0)}; + + FileScanRequest first_request; + ASSERT_TRUE(mapper.localize_filters({table_filter}, {}, &first_request, &state).ok()); + ASSERT_EQ(first_request.conjuncts.size(), 1); + const auto* first_slot = + assert_cast(first_request.conjuncts[0]->root()->children()[0].get()); + EXPECT_EQ(first_slot->slot_id(), 7); + EXPECT_EQ(first_slot->column_id(), 0); + + FileScanRequest second_request; + second_request.local_positions.emplace(LocalColumnId(9), LocalIndex(0)); + second_request.local_positions.emplace(LocalColumnId(10), LocalIndex(1)); + second_request.non_predicate_columns.push_back(LocalColumnIndex::top_level(LocalColumnId(9))); + ASSERT_TRUE(mapper.localize_filters({table_filter}, {}, &second_request, &state).ok()); + ASSERT_EQ(second_request.conjuncts.size(), 1); + const auto* second_slot = + assert_cast(second_request.conjuncts[0]->root()->children()[0].get()); + EXPECT_EQ(second_slot->slot_id(), 7); + EXPECT_EQ(second_slot->column_id(), 1); + + Block block; + block.insert(ColumnHelper::create_column_with_name({100, 100})); + block.insert(ColumnHelper::create_column_with_name({11, 22})); + auto* conjunct = second_request.conjuncts[0].get(); + auto status = conjunct->prepare(&state, RowDescriptor()); + ASSERT_TRUE(status.ok()) << status; + status = conjunct->open(&state); + ASSERT_TRUE(status.ok()) << status; + IColumn::Filter filter(block.rows(), 1); + bool can_filter_all = false; + status = conjunct->execute_filter(&block, filter.data(), block.rows(), false, &can_filter_all); + ASSERT_TRUE(status.ok()) << status; + EXPECT_FALSE(can_filter_all); + ASSERT_EQ(filter.size(), 2); + EXPECT_EQ(filter[0], 0); + EXPECT_EQ(filter[1], 1); + conjunct->close(); +} + +} // namespace +} // namespace doris::format diff --git a/be/test/format_v2/delimited_text/csv_reader_test.cpp b/be/test/format_v2/delimited_text/csv_reader_test.cpp new file mode 100644 index 00000000000000..7c787de7f8c09a --- /dev/null +++ b/be/test/format_v2/delimited_text/csv_reader_test.cpp @@ -0,0 +1,1070 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/delimited_text/csv_reader.h" + +#include + +#include +#include +#include +#include + +#include "common/consts.h" +#include "common/object_pool.h" +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_struct.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type/data_type_struct.h" +#include "exprs/vexpr.h" +#include "exprs/vexpr_context.h" +#include "format_v2/column_mapper.h" +#include "io/io_common.h" +#include "runtime/runtime_profile.h" +#include "testutil/desc_tbl_builder.h" +#include "testutil/mock/mock_runtime_state.h" + +namespace doris::format::csv { +namespace { + +TFileScanRangeParams csv_scan_params() { + TFileScanRangeParams params; + params.__set_format_type(TFileFormatType::FORMAT_CSV_PLAIN); + params.__set_file_type(TFileType::FILE_LOCAL); + TFileAttributes attributes; + TFileTextScanRangeParams text_params; + text_params.__set_column_separator(","); + text_params.__set_line_delimiter("\n"); + attributes.__set_text_params(std::move(text_params)); + attributes.__set_header_type(BeConsts::CSV_WITH_NAMES); + params.__set_file_attributes(std::move(attributes)); + params.__set_column_idxs({0, 1, 2}); + return params; +} + +std::unique_ptr file_description(const std::string& path, + int64_t range_start_offset = 0, + int64_t range_size = -1) { + auto desc = std::make_unique(); + desc->path = path; + desc->range_start_offset = range_start_offset; + desc->range_size = range_size; + desc->file_size = static_cast(std::filesystem::file_size(path)); + return desc; +} + +std::unique_ptr unknown_size_file_description(const std::string& path) { + auto desc = std::make_unique(); + desc->path = path; + desc->range_start_offset = 0; + desc->range_size = -1; + desc->file_size = -1; + return desc; +} + +std::vector build_slots(ObjectPool* pool) { + DescriptorTblBuilder builder(pool); + builder.declare_tuple() + << TupleDescBuilder::SlotType {make_nullable(std::make_shared()), "id"} + << TupleDescBuilder::SlotType {make_nullable(std::make_shared()), + "name"} + << TupleDescBuilder::SlotType {make_nullable(std::make_shared()), + "score"}; + auto* desc_tbl = builder.build(); + return desc_tbl->get_tuple_descriptor(0)->slots(); +} + +SlotDescriptor* make_test_slot(ObjectPool* pool, int slot_id, int slot_idx, DataTypePtr type, + const std::string& name) { + TSlotDescriptor slot_desc; + slot_desc.__set_id(slot_id); + slot_desc.__set_parent(0); + slot_desc.__set_slotType(type->to_thrift()); + slot_desc.__set_columnPos(slot_idx); + slot_desc.__set_byteOffset(0); + slot_desc.__set_nullIndicatorByte(slot_idx / 8); + slot_desc.__set_nullIndicatorBit(slot_idx % 8); + slot_desc.__set_slotIdx(slot_idx); + slot_desc.__set_isMaterialized(true); + slot_desc.__set_colName(name); + return pool->add(new SlotDescriptor(slot_desc)); +} + +std::vector build_struct_slots(ObjectPool* pool) { + const auto nullable_int = make_nullable(std::make_shared()); + const auto struct_type = make_nullable(std::make_shared( + DataTypes {nullable_int, nullable_int}, Strings {"a", "b"})); + return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared()), "id"), + make_test_slot(pool, 1, 1, struct_type, "s"), + make_test_slot(pool, 2, 2, make_nullable(std::make_shared()), "score")}; +} + +std::vector build_nested_complex_slots(ObjectPool* pool) { + const auto nullable_int = make_nullable(std::make_shared()); + const auto nullable_string = make_nullable(std::make_shared()); + const auto struct_type = make_nullable(std::make_shared( + DataTypes {nullable_int, nullable_string}, Strings {"a", "b"})); + const auto array_type = make_nullable(std::make_shared(struct_type)); + const auto map_type = + make_nullable(std::make_shared(nullable_string, struct_type)); + return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared()), "id"), + make_test_slot(pool, 1, 1, array_type, "xs"), + make_test_slot(pool, 2, 2, map_type, "kv")}; +} + +std::vector build_char_varchar_slots(ObjectPool* pool) { + const auto nullable_char3 = + make_nullable(std::make_shared(3, PrimitiveType::TYPE_CHAR)); + const auto nullable_varchar4 = + make_nullable(std::make_shared(4, PrimitiveType::TYPE_VARCHAR)); + const auto struct_type = make_nullable(std::make_shared( + DataTypes {nullable_char3, nullable_varchar4}, Strings {"city", "country"})); + return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared()), "id"), + make_test_slot(pool, 1, 1, nullable_char3, "city"), + make_test_slot(pool, 2, 2, struct_type, "region")}; +} + +std::unique_ptr create_reader( + const std::string& path, TFileScanRangeParams* params, + const std::vector& slots, MockRuntimeState* state, RuntimeProfile* profile, + int64_t range_start_offset = 0, int64_t range_size = -1, + TFileCompressType::type range_compress_type = TFileCompressType::UNKNOWN, + std::shared_ptr io_ctx = nullptr) { + auto system_properties = std::make_shared(); + system_properties->system_type = TFileType::FILE_LOCAL; + auto desc = file_description(path, range_start_offset, range_size); + auto reader = std::make_unique(system_properties, desc, std::move(io_ctx), profile, + params, slots, range_compress_type); + EXPECT_TRUE(reader->init(state).ok()); + return reader; +} + +std::unique_ptr create_unknown_size_reader(const std::string& path, + TFileScanRangeParams* params, + const std::vector& slots, + MockRuntimeState* state, + RuntimeProfile* profile) { + auto system_properties = std::make_shared(); + system_properties->system_type = TFileType::FILE_LOCAL; + auto desc = unknown_size_file_description(path); + auto reader = + std::make_unique(system_properties, desc, nullptr, profile, params, slots); + EXPECT_TRUE(reader->init(state).ok()); + return reader; +} + +Block make_block(const std::vector& schema, + const std::vector& local_ids) { + Block block; + for (const auto local_id : local_ids) { + const auto it = std::find_if(schema.begin(), schema.end(), [&](const auto& column) { + return column.local_id == local_id; + }); + EXPECT_TRUE(it != schema.end()); + block.insert({it->type->create_column(), it->type, it->name}); + } + return block; +} + +std::string nullable_string_at(const IColumn& column, size_t row) { + const auto& nullable = assert_cast(column); + const auto& nested = assert_cast(nullable.get_nested_column()); + return nested.get_data_at(row).to_string(); +} + +bool is_null_at(const IColumn& column, size_t row) { + const auto& nullable = assert_cast(column); + return nullable.is_null_at(row); +} + +int32_t nullable_int_at(const IColumn& column, size_t row) { + const auto& nullable = assert_cast(column); + const auto& nested = assert_cast(nullable.get_nested_column()); + return nested.get_data()[row]; +} + +int32_t nullable_struct_int_child_at(const IColumn& column, size_t child_index, size_t row) { + const auto& nullable = assert_cast(column); + const auto& struct_column = assert_cast(nullable.get_nested_column()); + const auto& child_nullable = + assert_cast(struct_column.get_column(child_index)); + const auto& nested = assert_cast(child_nullable.get_nested_column()); + return nested.get_data()[row]; +} + +int64_t counter_value(RuntimeProfile* profile, const std::string& name) { + auto* counter = profile->get_counter(name); + EXPECT_NE(counter, nullptr) << name; + return counter == nullptr ? 0 : counter->value(); +} + +class NullableIntGreaterThanExpr final : public VExpr { +public: + NullableIntGreaterThanExpr(size_t block_position, int32_t value) + : VExpr(std::make_shared(), false), + _block_position(block_position), + _value(value) {} + + const std::string& expr_name() const override { return _name; } + + bool is_constant() const override { return false; } + + Status execute_column_impl(VExprContext*, const Block* block, const Selector* selector, + size_t count, ColumnPtr& result_column) const override { + DORIS_CHECK(block != nullptr); + const auto& nullable = + assert_cast(*block->get_by_position(_block_position).column); + const auto& data = assert_cast(nullable.get_nested_column()); + + auto result = ColumnUInt8::create(); + auto& result_data = result->get_data(); + result_data.resize(count); + for (size_t row = 0; row < count; ++row) { + const auto source_row = selector == nullptr ? row : (*selector)[row]; + result_data[row] = + !nullable.is_null_at(source_row) && data.get_element(source_row) > _value; + } + result_column = std::move(result); + return Status::OK(); + } + + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = std::make_shared(_block_position, _value); + return Status::OK(); + } + +private: + size_t _block_position; + int32_t _value; + const std::string _name = "NullableIntGreaterThanExpr"; +}; + +class StructIntChildGreaterThanExpr final : public VExpr { +public: + StructIntChildGreaterThanExpr(size_t block_position, size_t child_index, int32_t value) + : VExpr(std::make_shared(), false), + _block_position(block_position), + _child_index(child_index), + _value(value) {} + + const std::string& expr_name() const override { return _name; } + + bool is_constant() const override { return false; } + + Status execute_column_impl(VExprContext*, const Block* block, const Selector* selector, + size_t count, ColumnPtr& result_column) const override { + DORIS_CHECK(block != nullptr); + const auto& nullable = + assert_cast(*block->get_by_position(_block_position).column); + const auto& struct_column = assert_cast(nullable.get_nested_column()); + const auto& child_nullable = + assert_cast(struct_column.get_column(_child_index)); + const auto& child_data = + assert_cast(child_nullable.get_nested_column()); + + auto result = ColumnUInt8::create(); + auto& data = result->get_data(); + data.resize(count); + for (size_t row = 0; row < count; ++row) { + const auto source_row = selector == nullptr ? row : (*selector)[row]; + data[row] = !nullable.is_null_at(source_row) && + !child_nullable.is_null_at(source_row) && + child_data.get_element(source_row) > _value; + } + result_column = std::move(result); + return Status::OK(); + } + + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = std::make_shared(_block_position, + _child_index, _value); + return Status::OK(); + } + +private: + size_t _block_position; + size_t _child_index; + int32_t _value; + const std::string _name = "StructIntChildGreaterThanExpr"; +}; + +VExprContextSPtr prepared_conjunct(RuntimeState* state, const VExprSPtr& expr) { + auto context = VExprContext::create_shared(expr); + auto status = context->prepare(state, RowDescriptor()); + EXPECT_TRUE(status.ok()) << status; + status = context->open(state); + EXPECT_TRUE(status.ok()) << status; + return context; +} + +class CsvV2ReaderTest : public testing::Test { +public: + void SetUp() override { + _test_dir = std::filesystem::temp_directory_path() / "doris_format_v2_csv_reader_test"; + std::filesystem::remove_all(_test_dir); + std::filesystem::create_directories(_test_dir); + _file_path = (_test_dir / "reader.csv").string(); + std::ofstream output(_file_path, std::ios::binary); + output << "id,name,score\n"; + output << "1,alice,10\n"; + output << "2,bob,20\n"; + output.close(); + _slots = build_slots(&_pool); + _params = csv_scan_params(); + } + + void TearDown() override { std::filesystem::remove_all(_test_dir); } + +protected: + ObjectPool _pool; + MockRuntimeState _state; + RuntimeProfile _profile {"csv_v2_reader_test"}; + std::filesystem::path _test_dir; + std::string _file_path; + std::vector _slots; + TFileScanRangeParams _params; +}; + +// Scenario: CSV v2 exposes FE-provided file slots as nullable file-local schema using column_idxs +// as CSV field ordinals. +TEST_F(CsvV2ReaderTest, SchemaUsesSlotTypesAndColumnIdxs) { + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 3); + EXPECT_EQ(schema[0].name, "id"); + EXPECT_EQ(schema[0].local_id, 0); + EXPECT_TRUE(schema[0].type->is_nullable()); + EXPECT_EQ(schema[1].name, "name"); + EXPECT_EQ(schema[1].local_id, 1); + EXPECT_TRUE(schema[1].type->is_nullable()); +} + +// Scenario: FE slot types for CSV are table target types. CHAR/VARCHAR length is not stored in the +// CSV file, so the file schema must expose bounded strings as unbounded STRING. Otherwise +// TableReader believes the file value already satisfies the table length and skips truncation. +TEST_F(CsvV2ReaderTest, SchemaTreatsCharVarcharSlotsAsUnboundedFileStrings) { + auto slots = build_char_varchar_slots(&_pool); + auto reader = create_reader(_file_path, &_params, slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 3); + + const auto city_type = remove_nullable(schema[1].type); + EXPECT_EQ(city_type->get_primitive_type(), TYPE_STRING); + EXPECT_EQ(assert_cast(city_type.get())->len(), -1); + + const auto region_type = remove_nullable(schema[2].type); + ASSERT_EQ(region_type->get_primitive_type(), TYPE_STRUCT); + const auto* region_struct = assert_cast(region_type.get()); + ASSERT_EQ(region_struct->get_elements().size(), 2); + EXPECT_EQ(remove_nullable(region_struct->get_element(0))->get_primitive_type(), TYPE_STRING); + EXPECT_EQ(remove_nullable(region_struct->get_element(1))->get_primitive_type(), TYPE_STRING); + ASSERT_EQ(schema[2].children.size(), 2); + EXPECT_EQ(remove_nullable(schema[2].children[0].type)->get_primitive_type(), TYPE_STRING); + EXPECT_EQ(remove_nullable(schema[2].children[1].type)->get_primitive_type(), TYPE_STRING); +} + +// Scenario: CSV is row-oriented and cannot lazy-read predicate columns separately. The reader +// declares that capability by choosing MaterializedColumnMapper itself. +TEST_F(CsvV2ReaderTest, CreatesMaterializedColumnMapper) { + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile); + auto mapper = reader->create_column_mapper({.mode = TableColumnMappingMode::BY_NAME}); + + ASSERT_NE(dynamic_cast(mapper.get()), nullptr); +} + +// Scenario: CSV v2 exposes delimited-text profile counters for read, parse, deserialize, and +// file-local conjunct filtering, so scanner profiles can explain where row-reader time is spent. +TEST_F(CsvV2ReaderTest, ProfileCountersTrackReadParseDeserializeAndFilter) { + const auto profile_path = (_test_dir / "profile.csv").string(); + std::ofstream output(profile_path, std::ios::binary); + output << "id,name,score\n"; + output << "\n"; + output << "1,alice,10\n"; + output << "2,bob,20\n"; + output.close(); + + _state._query_options.__set_read_csv_empty_line_as_null(true); + auto io_ctx = std::make_shared(); + auto reader = create_reader(profile_path, &_params, _slots, &_state, &_profile, 0, -1, + TFileCompressType::UNKNOWN, io_ctx); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)), + LocalColumnIndex::top_level(LocalColumnId(2))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + request->local_positions.emplace(LocalColumnId(2), LocalIndex(1)); + request->conjuncts = { + prepared_conjunct(&_state, std::make_shared(1, 15))}; + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {0, 2}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 2); + + EXPECT_NE(_profile.get_counter("OpenFileTime"), nullptr); + EXPECT_NE(_profile.get_counter("CreateLineReaderTime"), nullptr); + EXPECT_NE(_profile.get_counter("ReadLineTime"), nullptr); + EXPECT_NE(_profile.get_counter("SplitLineTime"), nullptr); + EXPECT_NE(_profile.get_counter("DeserializeTime"), nullptr); + EXPECT_NE(_profile.get_counter("ConjunctFilterTime"), nullptr); + EXPECT_NE(_profile.get_counter("DeleteConjunctFilterTime"), nullptr); + EXPECT_EQ(counter_value(&_profile, "RawLinesRead"), 3); + EXPECT_EQ(counter_value(&_profile, "RowsReadBeforeFilter"), 3); + EXPECT_EQ(counter_value(&_profile, "RowsFilteredByConjunct"), 2); + EXPECT_EQ(io_ctx->predicate_filtered_rows, 2); + EXPECT_EQ(counter_value(&_profile, "RowsFilteredByDeleteConjunct"), 0); + EXPECT_EQ(counter_value(&_profile, "RowsReturned"), 1); + EXPECT_EQ(counter_value(&_profile, "EmptyLinesRead"), 1); + EXPECT_EQ(counter_value(&_profile, "SkippedLines"), 1); + EXPECT_EQ(counter_value(&_profile, "CellsDeserialized"), 6); +} + +// Scenario: CSV has no embedded nested schema, but TableColumnMapper still needs semantic children +// for complex table columns. The reader synthesizes ARRAY/MAP/STRUCT children from the slot type +// while keeping the top-level local id as the CSV field ordinal from column_idxs. +TEST_F(CsvV2ReaderTest, SchemaSynthesizesComplexChildrenForColumnMapper) { + _params.__set_column_idxs({4, 7, 9}); + auto slots = build_nested_complex_slots(&_pool); + auto reader = create_reader(_file_path, &_params, slots, &_state, &_profile); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 3); + + EXPECT_EQ(schema[1].name, "xs"); + EXPECT_EQ(schema[1].local_id, 7); + ASSERT_EQ(schema[1].children.size(), 1); + EXPECT_EQ(schema[1].children[0].name, "element"); + EXPECT_EQ(schema[1].children[0].local_id, 0); + ASSERT_EQ(schema[1].children[0].children.size(), 2); + EXPECT_EQ(schema[1].children[0].children[0].name, "a"); + EXPECT_EQ(schema[1].children[0].children[0].local_id, 0); + EXPECT_EQ(schema[1].children[0].children[1].name, "b"); + EXPECT_EQ(schema[1].children[0].children[1].local_id, 1); + + EXPECT_EQ(schema[2].name, "kv"); + EXPECT_EQ(schema[2].local_id, 9); + ASSERT_EQ(schema[2].children.size(), 2); + EXPECT_EQ(schema[2].children[0].name, "key"); + EXPECT_EQ(schema[2].children[0].local_id, 0); + EXPECT_EQ(schema[2].children[1].name, "value"); + EXPECT_EQ(schema[2].children[1].local_id, 1); + ASSERT_EQ(schema[2].children[1].children.size(), 2); + EXPECT_EQ(schema[2].children[1].children[0].name, "a"); + EXPECT_EQ(schema[2].children[1].children[1].name, "b"); +} + +// Scenario: CSV v2 honors FileScanRequest local positions, so TableReader can request a subset of +// CSV fields in an order different from the physical CSV field order. +TEST_F(CsvV2ReaderTest, ReadsRequestedColumnsInFileLocalBlockOrder) { + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1)), + LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(1), LocalIndex(0)); + request->local_positions.emplace(LocalColumnId(0), LocalIndex(1)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {1, 0}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 2); + EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "alice"); + EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 1), "bob"); + EXPECT_EQ(nullable_int_at(*block.get_by_position(1).column, 0), 1); + EXPECT_EQ(nullable_int_at(*block.get_by_position(1).column, 1), 2); +} + +// Scenario: CSV v2 defaults to the same strict UTF-8 validation as the old query reader. Invalid +// bytes should fail fast unless the scan params explicitly disable text UTF-8 validation. +TEST_F(CsvV2ReaderTest, InvalidUtf8FailsWhenValidationEnabled) { + const auto invalid_path = (_test_dir / "invalid_utf8.csv").string(); + std::ofstream output(invalid_path, std::ios::binary); + output << "id,name,score\n"; + output << "1,"; + output.write("\xff", 1); + output << ",10\n"; + output.close(); + + auto reader = create_reader(invalid_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))}; + request->local_positions.emplace(LocalColumnId(1), LocalIndex(0)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {1}); + size_t rows = 0; + bool eof = false; + const auto status = reader->get_block(&block, &rows, &eof); + EXPECT_FALSE(status.ok()); + EXPECT_TRUE(status.to_string().find("Only support csv data in utf8 codec") != std::string::npos) + << status; +} + +// Scenario: external CSV scans can opt out of UTF-8 validation through +// `enable_text_validate_utf8=false`. In that mode the reader preserves the original bytes instead +// of rejecting the row. +TEST_F(CsvV2ReaderTest, DisableTextValidateUtf8ReadsRawBytes) { + const auto invalid_path = (_test_dir / "invalid_utf8_disabled.csv").string(); + std::ofstream output(invalid_path, std::ios::binary); + output << "id,name,score\n"; + output << "1,"; + output.write("\xff", 1); + output << ",10\n"; + output.close(); + + _params.file_attributes.__set_enable_text_validate_utf8(false); + auto reader = create_reader(invalid_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))}; + request->local_positions.emplace(LocalColumnId(1), LocalIndex(0)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {1}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), std::string("\xff", 1)); +} + +// Scenario: file TVF can keep the logical CSV format as FORMAT_CSV_PLAIN and put the actual gzip +// compression on the scan range. CSV v2 must honor that range-level compression before validating +// UTF-8; otherwise the gzip bytes are misread as CSV text. +TEST_F(CsvV2ReaderTest, RangeCompressTypeGzipDecompressesPlainCsvFormat) { + const auto gz_path = (_test_dir / "reader.csv.gz").string(); + static constexpr unsigned char gzipped_csv[] = { + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xcb, 0x4c, + 0xd1, 0xc9, 0x4b, 0xcc, 0x4d, 0xd5, 0x29, 0x4e, 0xce, 0x2f, 0x4a, 0xe5, + 0x32, 0xd4, 0x49, 0xcc, 0xc9, 0x4c, 0x4e, 0xd5, 0x31, 0x34, 0xe0, 0x02, + 0x00, 0x0b, 0xed, 0x5c, 0xa2, 0x19, 0x00, 0x00, 0x00}; + std::ofstream output(gz_path, std::ios::binary); + output.write(reinterpret_cast(gzipped_csv), sizeof(gzipped_csv)); + output.close(); + + _params.__set_format_type(TFileFormatType::FORMAT_CSV_PLAIN); + _params.__isset.compress_type = false; + auto reader = create_reader(gz_path, &_params, _slots, &_state, &_profile, 0, -1, + TFileCompressType::GZ); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)), + LocalColumnIndex::top_level(LocalColumnId(1))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + request->local_positions.emplace(LocalColumnId(1), LocalIndex(1)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {0, 1}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 1); + EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 0), "alice"); +} + +// Scenario: FE column_idxs define the CSV field ordinal for each physical file slot. The mapping +// can be non-identity when FE reorders projected file slots, so the reader must use the local id +// from FileScanRequest instead of the slot vector position. +TEST_F(CsvV2ReaderTest, ColumnIdxsMapSlotsToCsvOrdinals) { + const auto remap_path = (_test_dir / "remapped.csv").string(); + std::ofstream output(remap_path, std::ios::binary); + output << "name,score,id\n"; + output << "alice,10,1\n"; + output.close(); + + _params.__set_column_idxs({2, 0, 1}); + auto reader = create_reader(remap_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 3); + EXPECT_EQ(schema[0].name, "id"); + EXPECT_EQ(schema[0].local_id, 2); + EXPECT_EQ(schema[1].name, "name"); + EXPECT_EQ(schema[1].local_id, 0); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(2)), + LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(2), LocalIndex(0)); + request->local_positions.emplace(LocalColumnId(0), LocalIndex(1)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {2, 0}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 1); + EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 0), "alice"); +} + +// Scenario: CSV stores one complex column as one text field, so v2 must read the whole struct +// field before evaluating a file-local predicate on one child. This covers `SELECT s.a WHERE +// s.b > 10` style scans after CsvReader's MaterializedColumnMapper has requested the full +// top-level `s`. +TEST_F(CsvV2ReaderTest, FullStructColumnSupportsChildConjunctFiltering) { + const auto complex_path = (_test_dir / "complex.csv").string(); + std::ofstream output(complex_path, std::ios::binary); + output << "id|s|score\n"; + output << "1|{\"a\": 11, \"b\": 5}|10\n"; + output << "2|{\"a\": 22, \"b\": 20}|20\n"; + output.close(); + + _params.file_attributes.text_params.__set_column_separator("|"); + _params.__set_column_idxs({0, 1, 2}); + auto slots = build_struct_slots(&_pool); + auto reader = create_reader(complex_path, &_params, slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))}; + request->local_positions.emplace(LocalColumnId(1), LocalIndex(0)); + request->conjuncts = {prepared_conjunct( + &_state, std::make_shared( + /*block_position=*/0, /*child_index=*/1, /*value=*/10))}; + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {1}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_struct_int_child_at(*block.get_by_position(0).column, 0, 0), 22); + EXPECT_EQ(nullable_struct_int_child_at(*block.get_by_position(0).column, 1, 0), 20); +} + +// Scenario: a table-level scan can need only partition/default columns, leaving the CSV +// FileScanRequest with no file-local columns. The reader must still report the number of rows read. +TEST_F(CsvV2ReaderTest, EmptyFileLocalProjectionStillReportsRows) { + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile); + auto request = std::make_shared(); + ASSERT_TRUE(reader->open(request).ok()); + + Block block; + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_EQ(rows, 2); + EXPECT_FALSE(eof); +} + +// Scenario: stream-load/http_stream inputs do not have a known split size or file size. A first +// split must still read until EOF instead of rejecting the request before opening the stream. +TEST_F(CsvV2ReaderTest, UnknownFirstSplitSizeReadsUntilEof) { + auto reader = create_unknown_size_reader(_file_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)), + LocalColumnIndex::top_level(LocalColumnId(1))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + request->local_positions.emplace(LocalColumnId(1), LocalIndex(1)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {0, 1}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 2); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 1); + EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 1), "bob"); +} + +// Scenario: stream load/http_stream CSV input is not backed by a filesystem. If TableReader fails +// to preserve the stream load id, the v2 reader should report that directly instead of calling the +// generic FileFactory path and returning "unsupported file reader type: 2". +TEST_F(CsvV2ReaderTest, StreamInputRequiresLoadIdBeforeOpeningPipe) { + _params.__set_file_type(TFileType::FILE_STREAM); + auto reader = create_unknown_size_reader(_file_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + const auto status = reader->open(request); + + ASSERT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("stream reader requires load id"), std::string::npos) + << status; +} + +// Scenario: CSV has no footer row count, so v2 COUNT pushdown scans the split and returns the +// counted row count through FileAggregateResult. +TEST_F(CsvV2ReaderTest, CountAggregateScansRows) { + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile); + auto request = std::make_shared(); + ASSERT_TRUE(reader->open(request).ok()); + + FileAggregateRequest aggregate_request; + aggregate_request.agg_type = TPushAggOp::type::COUNT; + FileAggregateResult aggregate_result; + ASSERT_TRUE(reader->get_aggregate_result(aggregate_request, &aggregate_result).ok()); + EXPECT_EQ(aggregate_result.count, 2); +} + +// Scenario: CSV v2 parses enclosed fields itself instead of delegating to the old CsvReader. A +// separator inside an enclosed string must stay inside the same CSV field. +TEST_F(CsvV2ReaderTest, EnclosedFieldKeepsSeparatorInsideStringValue) { + const auto quoted_path = (_test_dir / "quoted.csv").string(); + std::ofstream output(quoted_path, std::ios::binary); + output << "id,name,score\n"; + output << "1,\"alice,team\",10\n"; + output.close(); + + _params.file_attributes.text_params.__set_enclose('"'); + _params.file_attributes.text_params.__set_escape('\\'); + auto reader = create_reader(quoted_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))}; + request->local_positions.emplace(LocalColumnId(1), LocalIndex(0)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {1}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "alice,team"); +} + +// Scenario: when the CSV row has fewer fields than the FE-provided file slot list, v2 fills the +// missing requested field with NULL instead of failing or shifting later columns. +TEST_F(CsvV2ReaderTest, MissingRequestedFieldUsesNullFormat) { + const auto missing_path = (_test_dir / "missing.csv").string(); + std::ofstream output(missing_path, std::ios::binary); + output << "id,name,score\n"; + output << "1,alice\n"; + output.close(); + + auto reader = create_reader(missing_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(2))}; + request->local_positions.emplace(LocalColumnId(2), LocalIndex(0)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {2}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 0)); +} + +// Scenario: the first line may contain UTF-8 BOM and CSV_WITH_NAMES_AND_TYPES has two header +// records. Both must be skipped before materializing the first data row. +TEST_F(CsvV2ReaderTest, HeaderNamesAndTypesSkipsTwoLinesAndBom) { + const auto header_path = (_test_dir / "header_names_types.csv").string(); + std::ofstream output(header_path, std::ios::binary); + output.write("\xEF\xBB\xBF", 3); + output << "id,name,score\n"; + output << "INT,STRING,INT\n"; + output << "7,carol,70\n"; + output.close(); + + _params.file_attributes.__set_header_type(BeConsts::CSV_WITH_NAMES_AND_TYPES); + auto reader = create_reader(header_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {0}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 7); +} + +// Scenario: when the first returned data line starts with UTF-8 BOM, CSV v2 strips the BOM before +// passing the cell to the serde. This matters for headerless files whose first column is numeric. +TEST_F(CsvV2ReaderTest, BomIsRemovedFromFirstDataLineWithoutHeader) { + const auto bom_path = (_test_dir / "bom_data.csv").string(); + std::ofstream output(bom_path, std::ios::binary); + output.write("\xEF\xBB\xBF", 3); + output << "5,bom,50\n"; + output.close(); + + _params.file_attributes.__isset.header_type = false; + auto reader = create_reader(bom_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {0}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 5); +} + +// Scenario: when FE does not set header_type, CSV v2 must honor skip_lines exactly as the old +// reader does. +TEST_F(CsvV2ReaderTest, SkipLinesUsedWhenHeaderTypeUnset) { + const auto skip_path = (_test_dir / "skip_lines.csv").string(); + std::ofstream output(skip_path, std::ios::binary); + output << "skip me\n"; + output << "skip me too\n"; + output << "3,dan,30\n"; + output.close(); + + _params.file_attributes.__isset.header_type = false; + _params.file_attributes.__set_skip_lines(2); + auto reader = create_reader(skip_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {0}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 3); +} + +// Scenario: empty physical lines are skipped by default, but read_csv_empty_line_as_null turns one +// empty line into one all-null logical row. +TEST_F(CsvV2ReaderTest, EmptyLineAsNullWhenQueryOptionEnabled) { + const auto empty_line_path = (_test_dir / "empty_line.csv").string(); + std::ofstream output(empty_line_path, std::ios::binary); + output << "id,name,score\n"; + output << "\n"; + output << "4,erin,40\n"; + output.close(); + + _state._query_options.__set_read_csv_empty_line_as_null(true); + auto reader = create_reader(empty_line_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {0}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 2); + EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 0)); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 1), 4); +} + +// Scenario: FE-provided CSV text parameters define NULL semantics. Explicit null_format and +// empty_field_as_null should both produce nullable values without throwing serde errors. +TEST_F(CsvV2ReaderTest, NullFormatAndEmptyFieldAsNullProduceNullableValues) { + const auto null_path = (_test_dir / "null_format.csv").string(); + std::ofstream output(null_path, std::ios::binary); + output << "id,name,score\n"; + output << "1,NULL,\n"; + output.close(); + + _params.file_attributes.text_params.__set_null_format("NULL"); + _params.file_attributes.text_params.__set_empty_field_as_null(true); + auto reader = create_reader(null_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1)), + LocalColumnIndex::top_level(LocalColumnId(2))}; + request->local_positions.emplace(LocalColumnId(1), LocalIndex(0)); + request->local_positions.emplace(LocalColumnId(2), LocalIndex(1)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {1, 2}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 0)); + EXPECT_TRUE(is_null_at(*block.get_by_position(1).column, 0)); +} + +// Scenario: OpenCSV keeps an empty field as an empty string when empty_field_as_null is false, +// even if FE passes an empty null_format. This differs from Hive text serde, where an empty +// serialization.null.format is a real NULL marker. +TEST_F(CsvV2ReaderTest, EmptyNullFormatKeepsCsvEmptyFieldAsEmptyString) { + const auto null_path = (_test_dir / "empty_null_format.csv").string(); + std::ofstream output(null_path, std::ios::binary); + output << "id,name,score\n"; + output << "1,alice,10\n"; + output << "2,,20\n"; + output << "3,NULL,30\n"; + output.close(); + + _params.file_attributes.text_params.__set_null_format(""); + _params.file_attributes.text_params.__set_empty_field_as_null(false); + auto reader = create_reader(null_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))}; + request->local_positions.emplace(LocalColumnId(1), LocalIndex(0)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {1}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 3); + EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 0)); + EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 1)); + EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 1), ""); + EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 2)); + EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 2), "NULL"); +} + +// Scenario: a non-first split starts inside a record. CSV v2 pre-reads enough delimiter bytes and +// skips the partial first line so the split begins at the next complete row. +TEST_F(CsvV2ReaderTest, NonFirstSplitSkipsPartialFirstRecord) { + const auto split_path = (_test_dir / "split.csv").string(); + std::ofstream output(split_path, std::ios::binary); + output << "1,skip,10\n"; + output << "2,bob,20\n"; + output.close(); + + _params.file_attributes.__isset.header_type = false; + auto reader = create_reader(split_path, &_params, _slots, &_state, &_profile, + /*range_start_offset=*/3); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {0}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 2); +} + +// Scenario: compressed CSV cannot be split at arbitrary byte offsets because the decompressor needs +// the stream from the beginning. V2 should reject such a split before constructing the line reader. +TEST_F(CsvV2ReaderTest, NonFirstCompressedSplitReturnsError) { + _params.__set_format_type(TFileFormatType::FORMAT_CSV_GZ); + _params.file_attributes.__isset.header_type = false; + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile, + /*range_start_offset=*/1); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + EXPECT_FALSE(reader->open(request).ok()); +} + +// Scenario: FileScanRequest is a TableReader-to-FileReader contract. Unknown CSV ordinals, +// out-of-range block positions, and sparse block-position maps must fail during reader open. +TEST_F(CsvV2ReaderTest, InvalidScanRequestReturnsError) { + { + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile); + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(99))}; + request->local_positions.emplace(LocalColumnId(99), LocalIndex(0)); + EXPECT_FALSE(reader->open(request).ok()); + } + { + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile); + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(2)); + EXPECT_FALSE(reader->open(request).ok()); + } + { + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile); + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)), + LocalColumnIndex::top_level(LocalColumnId(1))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + request->local_positions.emplace(LocalColumnId(1), LocalIndex(0)); + EXPECT_FALSE(reader->open(request).ok()); + } +} + +// Scenario: CSV v2 can count rows by scanning, but it cannot answer min/max or mixed aggregate +// requests from metadata. +TEST_F(CsvV2ReaderTest, UnsupportedAggregateReturnsNotSupported) { + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile); + auto request = std::make_shared(); + ASSERT_TRUE(reader->open(request).ok()); + + FileAggregateRequest aggregate_request; + aggregate_request.agg_type = TPushAggOp::type::MINMAX; + FileAggregateResult aggregate_result; + EXPECT_FALSE(reader->get_aggregate_result(aggregate_request, &aggregate_result).ok()); +} + +} // namespace +} // namespace doris::format::csv diff --git a/be/test/format_v2/delimited_text/text_reader_test.cpp b/be/test/format_v2/delimited_text/text_reader_test.cpp new file mode 100644 index 00000000000000..b6402cab5d86d6 --- /dev/null +++ b/be/test/format_v2/delimited_text/text_reader_test.cpp @@ -0,0 +1,965 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/delimited_text/text_reader.h" + +#include + +#include +#include +#include +#include + +#include "common/consts.h" +#include "common/object_pool.h" +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_struct.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type/data_type_struct.h" +#include "exprs/vexpr.h" +#include "exprs/vexpr_context.h" +#include "format_v2/column_mapper.h" +#include "io/io_common.h" +#include "runtime/runtime_profile.h" +#include "testutil/desc_tbl_builder.h" +#include "testutil/mock/mock_runtime_state.h" + +namespace doris::format::text { +namespace { + +TFileScanRangeParams text_scan_params() { + TFileScanRangeParams params; + params.__set_format_type(TFileFormatType::FORMAT_TEXT); + params.__set_file_type(TFileType::FILE_LOCAL); + TFileAttributes attributes; + TFileTextScanRangeParams text_params; + text_params.__set_column_separator(","); + text_params.__set_line_delimiter("\n"); + text_params.__set_escape('\\'); + attributes.__set_text_params(std::move(text_params)); + params.__set_file_attributes(std::move(attributes)); + params.__set_column_idxs({0, 1, 2}); + return params; +} + +std::unique_ptr file_description(const std::string& path, + int64_t range_start_offset = 0, + int64_t range_size = -1) { + auto desc = std::make_unique(); + desc->path = path; + desc->range_start_offset = range_start_offset; + desc->range_size = range_size; + desc->file_size = static_cast(std::filesystem::file_size(path)); + return desc; +} + +std::vector build_slots(ObjectPool* pool) { + DescriptorTblBuilder builder(pool); + builder.declare_tuple() + << TupleDescBuilder::SlotType {make_nullable(std::make_shared()), "id"} + << TupleDescBuilder::SlotType {make_nullable(std::make_shared()), + "name"} + << TupleDescBuilder::SlotType {make_nullable(std::make_shared()), + "score"}; + auto* desc_tbl = builder.build(); + return desc_tbl->get_tuple_descriptor(0)->slots(); +} + +SlotDescriptor* make_test_slot(ObjectPool* pool, int slot_id, int slot_idx, DataTypePtr type, + const std::string& name) { + TSlotDescriptor slot_desc; + slot_desc.__set_id(slot_id); + slot_desc.__set_parent(0); + slot_desc.__set_slotType(type->to_thrift()); + slot_desc.__set_columnPos(slot_idx); + slot_desc.__set_byteOffset(0); + slot_desc.__set_nullIndicatorByte(slot_idx / 8); + slot_desc.__set_nullIndicatorBit(slot_idx % 8); + slot_desc.__set_slotIdx(slot_idx); + slot_desc.__set_isMaterialized(true); + slot_desc.__set_colName(name); + return pool->add(new SlotDescriptor(slot_desc)); +} + +std::vector build_struct_slots(ObjectPool* pool) { + const auto nullable_int = make_nullable(std::make_shared()); + const auto struct_type = make_nullable(std::make_shared( + DataTypes {nullable_int, nullable_int}, Strings {"a", "b"})); + return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared()), "id"), + make_test_slot(pool, 1, 1, struct_type, "s"), + make_test_slot(pool, 2, 2, make_nullable(std::make_shared()), "score")}; +} + +std::vector build_nested_complex_slots(ObjectPool* pool) { + const auto nullable_int = make_nullable(std::make_shared()); + const auto nullable_string = make_nullable(std::make_shared()); + const auto struct_type = make_nullable(std::make_shared( + DataTypes {nullable_int, nullable_string}, Strings {"a", "b"})); + const auto array_type = make_nullable(std::make_shared(struct_type)); + const auto map_type = + make_nullable(std::make_shared(nullable_string, struct_type)); + return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared()), "id"), + make_test_slot(pool, 1, 1, array_type, "xs"), + make_test_slot(pool, 2, 2, map_type, "kv")}; +} + +std::vector build_char_varchar_slots(ObjectPool* pool) { + const auto nullable_char3 = + make_nullable(std::make_shared(3, PrimitiveType::TYPE_CHAR)); + const auto nullable_varchar4 = + make_nullable(std::make_shared(4, PrimitiveType::TYPE_VARCHAR)); + const auto struct_type = make_nullable(std::make_shared( + DataTypes {nullable_char3, nullable_varchar4}, Strings {"city", "country"})); + return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared()), "id"), + make_test_slot(pool, 1, 1, nullable_char3, "city"), + make_test_slot(pool, 2, 2, struct_type, "region")}; +} + +std::unique_ptr create_reader(const std::string& path, TFileScanRangeParams* params, + const std::vector& slots, + MockRuntimeState* state, RuntimeProfile* profile, + int64_t range_start_offset = 0, int64_t range_size = -1, + std::shared_ptr io_ctx = nullptr) { + auto system_properties = std::make_shared(); + system_properties->system_type = TFileType::FILE_LOCAL; + auto desc = file_description(path, range_start_offset, range_size); + auto reader = std::make_unique(system_properties, desc, std::move(io_ctx), profile, + params, slots); + EXPECT_TRUE(reader->init(state).ok()); + return reader; +} + +Block make_block(const std::vector& schema, + const std::vector& local_ids) { + Block block; + for (const auto local_id : local_ids) { + const auto it = std::find_if(schema.begin(), schema.end(), [&](const auto& column) { + return column.local_id == local_id; + }); + EXPECT_TRUE(it != schema.end()); + block.insert({it->type->create_column(), it->type, it->name}); + } + return block; +} + +std::string nullable_string_at(const IColumn& column, size_t row) { + const auto& nullable = assert_cast(column); + const auto& nested = assert_cast(nullable.get_nested_column()); + return nested.get_data_at(row).to_string(); +} + +int32_t nullable_int_at(const IColumn& column, size_t row) { + const auto& nullable = assert_cast(column); + const auto& nested = assert_cast(nullable.get_nested_column()); + return nested.get_data()[row]; +} + +bool is_null_at(const IColumn& column, size_t row) { + const auto& nullable = assert_cast(column); + return nullable.is_null_at(row); +} + +int32_t nullable_struct_int_child_at(const IColumn& column, size_t child_index, size_t row) { + const auto& nullable = assert_cast(column); + const auto& struct_column = assert_cast(nullable.get_nested_column()); + const auto& child_nullable = + assert_cast(struct_column.get_column(child_index)); + const auto& nested = assert_cast(child_nullable.get_nested_column()); + return nested.get_data()[row]; +} + +int64_t counter_value(RuntimeProfile* profile, const std::string& name) { + auto* counter = profile->get_counter(name); + EXPECT_NE(counter, nullptr) << name; + return counter == nullptr ? 0 : counter->value(); +} + +class NullableIntGreaterThanExpr final : public VExpr { +public: + NullableIntGreaterThanExpr(size_t block_position, int32_t value) + : VExpr(std::make_shared(), false), + _block_position(block_position), + _value(value) {} + + const std::string& expr_name() const override { return _name; } + + bool is_constant() const override { return false; } + + Status execute_column_impl(VExprContext*, const Block* block, const Selector* selector, + size_t count, ColumnPtr& result_column) const override { + DORIS_CHECK(block != nullptr); + const auto& nullable = + assert_cast(*block->get_by_position(_block_position).column); + const auto& data = assert_cast(nullable.get_nested_column()); + + auto result = ColumnUInt8::create(); + auto& result_data = result->get_data(); + result_data.resize(count); + for (size_t row = 0; row < count; ++row) { + const auto source_row = selector == nullptr ? row : (*selector)[row]; + result_data[row] = + !nullable.is_null_at(source_row) && data.get_element(source_row) > _value; + } + result_column = std::move(result); + return Status::OK(); + } + + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = std::make_shared(_block_position, _value); + return Status::OK(); + } + +private: + size_t _block_position; + int32_t _value; + const std::string _name = "NullableIntGreaterThanExpr"; +}; + +class StructIntChildGreaterThanExpr final : public VExpr { +public: + StructIntChildGreaterThanExpr(size_t block_position, size_t child_index, int32_t value) + : VExpr(std::make_shared(), false), + _block_position(block_position), + _child_index(child_index), + _value(value) {} + + const std::string& expr_name() const override { return _name; } + + bool is_constant() const override { return false; } + + Status execute_column_impl(VExprContext*, const Block* block, const Selector* selector, + size_t count, ColumnPtr& result_column) const override { + DORIS_CHECK(block != nullptr); + const auto& nullable = + assert_cast(*block->get_by_position(_block_position).column); + const auto& struct_column = assert_cast(nullable.get_nested_column()); + const auto& child_nullable = + assert_cast(struct_column.get_column(_child_index)); + const auto& child_data = + assert_cast(child_nullable.get_nested_column()); + + auto result = ColumnUInt8::create(); + auto& data = result->get_data(); + data.resize(count); + for (size_t row = 0; row < count; ++row) { + const auto source_row = selector == nullptr ? row : (*selector)[row]; + data[row] = !nullable.is_null_at(source_row) && + !child_nullable.is_null_at(source_row) && + child_data.get_element(source_row) > _value; + } + result_column = std::move(result); + return Status::OK(); + } + + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = std::make_shared(_block_position, + _child_index, _value); + return Status::OK(); + } + +private: + size_t _block_position; + size_t _child_index; + int32_t _value; + const std::string _name = "StructIntChildGreaterThanExpr"; +}; + +VExprContextSPtr prepared_conjunct(RuntimeState* state, const VExprSPtr& expr) { + auto context = VExprContext::create_shared(expr); + auto status = context->prepare(state, RowDescriptor()); + EXPECT_TRUE(status.ok()) << status; + status = context->open(state); + EXPECT_TRUE(status.ok()) << status; + return context; +} + +class TextV2ReaderTest : public testing::Test { +public: + void SetUp() override { + _test_dir = std::filesystem::temp_directory_path() / "doris_format_v2_text_reader_test"; + std::filesystem::remove_all(_test_dir); + std::filesystem::create_directories(_test_dir); + _file_path = (_test_dir / "reader.text").string(); + std::ofstream output(_file_path, std::ios::binary); + output << "1,alice,10\n"; + output << "2,bob,20\n"; + output.close(); + _slots = build_slots(&_pool); + _params = text_scan_params(); + } + + void TearDown() override { std::filesystem::remove_all(_test_dir); } + +protected: + ObjectPool _pool; + MockRuntimeState _state; + RuntimeProfile _profile {"text_v2_reader_test"}; + std::filesystem::path _test_dir; + std::string _file_path; + std::vector _slots; + TFileScanRangeParams _params; +}; + +// Scenario: Text v2 exposes FE-provided file slots as nullable file-local schema using column_idxs +// as Hive text field ordinals. +TEST_F(TextV2ReaderTest, SchemaUsesSlotTypesAndColumnIdxs) { + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 3); + EXPECT_EQ(schema[0].name, "id"); + EXPECT_EQ(schema[0].local_id, 0); + EXPECT_TRUE(schema[0].type->is_nullable()); + EXPECT_EQ(schema[1].name, "name"); + EXPECT_EQ(schema[1].local_id, 1); + EXPECT_TRUE(schema[1].type->is_nullable()); +} + +// Scenario: FE slot types for Hive text are table target types. CHAR/VARCHAR length is not stored +// in the text file, so the file schema must expose bounded strings as unbounded STRING. Otherwise +// TableReader believes the file value already satisfies the table length and skips truncation. +TEST_F(TextV2ReaderTest, SchemaTreatsCharVarcharSlotsAsUnboundedFileStrings) { + auto slots = build_char_varchar_slots(&_pool); + auto reader = create_reader(_file_path, &_params, slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 3); + + const auto city_type = remove_nullable(schema[1].type); + EXPECT_EQ(city_type->get_primitive_type(), TYPE_STRING); + EXPECT_EQ(assert_cast(city_type.get())->len(), -1); + + const auto region_type = remove_nullable(schema[2].type); + ASSERT_EQ(region_type->get_primitive_type(), TYPE_STRUCT); + const auto* region_struct = assert_cast(region_type.get()); + ASSERT_EQ(region_struct->get_elements().size(), 2); + EXPECT_EQ(remove_nullable(region_struct->get_element(0))->get_primitive_type(), TYPE_STRING); + EXPECT_EQ(remove_nullable(region_struct->get_element(1))->get_primitive_type(), TYPE_STRING); + ASSERT_EQ(schema[2].children.size(), 2); + EXPECT_EQ(remove_nullable(schema[2].children[0].type)->get_primitive_type(), TYPE_STRING); + EXPECT_EQ(remove_nullable(schema[2].children[1].type)->get_primitive_type(), TYPE_STRING); +} + +// Scenario: Hive text is row-oriented and cannot lazy-read predicate columns separately. The +// reader declares that capability by choosing MaterializedColumnMapper itself. +TEST_F(TextV2ReaderTest, CreatesMaterializedColumnMapper) { + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile); + auto mapper = reader->create_column_mapper({.mode = TableColumnMappingMode::BY_NAME}); + + ASSERT_NE(dynamic_cast(mapper.get()), nullptr); +} + +// Scenario: Text v2 exposes delimited-text profile counters for read, parse, deserialize, and +// file-local conjunct filtering, so scanner profiles can explain where row-reader time is spent. +TEST_F(TextV2ReaderTest, ProfileCountersTrackReadParseDeserializeAndFilter) { + const auto profile_path = (_test_dir / "profile.text").string(); + std::ofstream output(profile_path, std::ios::binary); + output << "\n"; + output << "1,alice,10\n"; + output << "2,bob,20\n"; + output.close(); + + _state._query_options.__set_read_csv_empty_line_as_null(true); + auto io_ctx = std::make_shared(); + auto reader = create_reader(profile_path, &_params, _slots, &_state, &_profile, 0, -1, io_ctx); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)), + LocalColumnIndex::top_level(LocalColumnId(2))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + request->local_positions.emplace(LocalColumnId(2), LocalIndex(1)); + request->conjuncts = { + prepared_conjunct(&_state, std::make_shared(1, 15))}; + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {0, 2}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 2); + + EXPECT_NE(_profile.get_counter("OpenFileTime"), nullptr); + EXPECT_NE(_profile.get_counter("CreateLineReaderTime"), nullptr); + EXPECT_NE(_profile.get_counter("ReadLineTime"), nullptr); + EXPECT_NE(_profile.get_counter("SplitLineTime"), nullptr); + EXPECT_NE(_profile.get_counter("DeserializeTime"), nullptr); + EXPECT_NE(_profile.get_counter("ConjunctFilterTime"), nullptr); + EXPECT_NE(_profile.get_counter("DeleteConjunctFilterTime"), nullptr); + EXPECT_EQ(counter_value(&_profile, "RawLinesRead"), 3); + EXPECT_EQ(counter_value(&_profile, "RowsReadBeforeFilter"), 3); + EXPECT_EQ(counter_value(&_profile, "RowsFilteredByConjunct"), 2); + EXPECT_EQ(io_ctx->predicate_filtered_rows, 2); + EXPECT_EQ(counter_value(&_profile, "RowsFilteredByDeleteConjunct"), 0); + EXPECT_EQ(counter_value(&_profile, "RowsReturned"), 1); + EXPECT_EQ(counter_value(&_profile, "EmptyLinesRead"), 1); + EXPECT_EQ(counter_value(&_profile, "SkippedLines"), 0); + EXPECT_EQ(counter_value(&_profile, "CellsDeserialized"), 6); +} + +// Scenario: Hive text has no embedded nested schema, but TableColumnMapper still needs semantic +// children for complex table columns. The reader synthesizes ARRAY/MAP/STRUCT children from the +// slot type while keeping the top-level local id as the text field ordinal from column_idxs. +TEST_F(TextV2ReaderTest, SchemaSynthesizesComplexChildrenForColumnMapper) { + _params.__set_column_idxs({4, 7, 9}); + auto slots = build_nested_complex_slots(&_pool); + auto reader = create_reader(_file_path, &_params, slots, &_state, &_profile); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 3); + + EXPECT_EQ(schema[1].name, "xs"); + EXPECT_EQ(schema[1].local_id, 7); + ASSERT_EQ(schema[1].children.size(), 1); + EXPECT_EQ(schema[1].children[0].name, "element"); + EXPECT_EQ(schema[1].children[0].local_id, 0); + ASSERT_EQ(schema[1].children[0].children.size(), 2); + EXPECT_EQ(schema[1].children[0].children[0].name, "a"); + EXPECT_EQ(schema[1].children[0].children[0].local_id, 0); + EXPECT_EQ(schema[1].children[0].children[1].name, "b"); + EXPECT_EQ(schema[1].children[0].children[1].local_id, 1); + + EXPECT_EQ(schema[2].name, "kv"); + EXPECT_EQ(schema[2].local_id, 9); + ASSERT_EQ(schema[2].children.size(), 2); + EXPECT_EQ(schema[2].children[0].name, "key"); + EXPECT_EQ(schema[2].children[0].local_id, 0); + EXPECT_EQ(schema[2].children[1].name, "value"); + EXPECT_EQ(schema[2].children[1].local_id, 1); + ASSERT_EQ(schema[2].children[1].children.size(), 2); + EXPECT_EQ(schema[2].children[1].children[0].name, "a"); + EXPECT_EQ(schema[2].children[1].children[1].name, "b"); +} + +// Scenario: Hive text escapes a field separator inside a string. The splitter keeps the escaped +// separator in the same field, and hive-text serde unescapes the final string value. +TEST_F(TextV2ReaderTest, EscapedSeparatorStaysInsideStringField) { + const auto escaped_path = (_test_dir / "escaped.text").string(); + std::ofstream output(escaped_path, std::ios::binary); + output << "1,alice\\,team,10\n"; + output.close(); + + auto reader = create_reader(escaped_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1)), + LocalColumnIndex::top_level(LocalColumnId(2))}; + request->local_positions.emplace(LocalColumnId(1), LocalIndex(0)); + request->local_positions.emplace(LocalColumnId(2), LocalIndex(1)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {1, 2}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "alice,team"); + EXPECT_EQ(nullable_int_at(*block.get_by_position(1).column, 0), 10); +} + +// Scenario: Hive text supports multi-character field separators. V2 must not split on partial +// matches and must still honor FileScanRequest output positions. +TEST_F(TextV2ReaderTest, MultiCharacterSeparatorReadsRequestedColumns) { + const auto multi_path = (_test_dir / "multi.text").string(); + std::ofstream output(multi_path, std::ios::binary); + output << "3||carol||30\n"; + output.close(); + + _params.file_attributes.text_params.__set_column_separator("||"); + auto reader = create_reader(multi_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1)), + LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(1), LocalIndex(0)); + request->local_positions.emplace(LocalColumnId(0), LocalIndex(1)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {1, 0}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "carol"); + EXPECT_EQ(nullable_int_at(*block.get_by_position(1).column, 0), 3); +} + +// Scenario: column_idxs can map table slots to non-identity Hive text field ordinals. +TEST_F(TextV2ReaderTest, ColumnIdxsMapSlotsToTextOrdinals) { + const auto remap_path = (_test_dir / "remapped.text").string(); + std::ofstream output(remap_path, std::ios::binary); + output << "doris,40,4\n"; + output.close(); + + _params.__set_column_idxs({2, 0, 1}); + auto reader = create_reader(remap_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + EXPECT_EQ(schema[0].local_id, 2); + EXPECT_EQ(schema[1].local_id, 0); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(2)), + LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(2), LocalIndex(0)); + request->local_positions.emplace(LocalColumnId(0), LocalIndex(1)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {2, 0}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 4); + EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 0), "doris"); +} + +// Scenario: Hive text complex values are encoded inside one top-level text field. V2 reads the +// complete struct field first, then evaluates a file-local predicate on one child, covering +// `SELECT s.a WHERE s.b > 10` without pretending that Text has physical nested-column pruning. +TEST_F(TextV2ReaderTest, FullStructColumnSupportsChildConjunctFiltering) { + const auto complex_path = (_test_dir / "complex.text").string(); + std::ofstream output(complex_path, std::ios::binary); + output << "1|11,5|10\n"; + output << "2|22,20|20\n"; + output.close(); + + _params.file_attributes.text_params.__set_column_separator("|"); + _params.file_attributes.text_params.__set_collection_delimiter(","); + _params.__set_column_idxs({0, 1, 2}); + auto slots = build_struct_slots(&_pool); + auto reader = create_reader(complex_path, &_params, slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))}; + request->local_positions.emplace(LocalColumnId(1), LocalIndex(0)); + request->conjuncts = {prepared_conjunct( + &_state, std::make_shared( + /*block_position=*/0, /*child_index=*/1, /*value=*/10))}; + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {1}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_struct_int_child_at(*block.get_by_position(0).column, 0, 0), 22); + EXPECT_EQ(nullable_struct_int_child_at(*block.get_by_position(0).column, 1, 0), 20); +} + +// Scenario: missing Hive text fields are materialized as NULL rather than shifting later columns. +TEST_F(TextV2ReaderTest, MissingRequestedFieldUsesNullFormat) { + const auto missing_path = (_test_dir / "missing.text").string(); + std::ofstream output(missing_path, std::ios::binary); + output << "1,alice\n"; + output.close(); + + auto reader = create_reader(missing_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(2))}; + request->local_positions.emplace(LocalColumnId(2), LocalIndex(0)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {2}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 0)); +} + +// Scenario: Text v2 can scan a request with no materialized columns. This is used by table-level +// COUNT-style paths where the reader must still return the number of logical rows read. +TEST_F(TextV2ReaderTest, EmptyFileLocalProjectionStillReportsRows) { + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile); + auto request = std::make_shared(); + ASSERT_TRUE(reader->open(request).ok()); + + Block block; + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_EQ(rows, 2); + EXPECT_FALSE(eof); +} + +// Scenario: stream load/http_stream text input is not backed by a filesystem. If TableReader fails +// to preserve the stream load id, the v2 reader should report that directly instead of calling the +// generic FileFactory path and returning "unsupported file reader type: 2". +TEST_F(TextV2ReaderTest, StreamInputRequiresLoadIdBeforeOpeningPipe) { + _params.__set_file_type(TFileType::FILE_STREAM); + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + const auto status = reader->open(request); + + ASSERT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("stream reader requires load id"), std::string::npos) + << status; +} + +// Scenario: explicit text null_format is honored by Hive-text serde. Unlike CSV +// empty_field_as_null, an empty text field is not NULL unless it equals null_format exactly. +TEST_F(TextV2ReaderTest, NullFormatProducesNullableValue) { + const auto null_path = (_test_dir / "null_format.text").string(); + std::ofstream output(null_path, std::ios::binary); + output << "1,NULL,10\n"; + output << "2,,20\n"; + output.close(); + + _params.file_attributes.text_params.__set_null_format("NULL"); + auto reader = create_reader(null_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))}; + request->local_positions.emplace(LocalColumnId(1), LocalIndex(0)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {1}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 2); + EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 0)); + EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 1)); + EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 1), ""); +} + +// Scenario: Hive SerDe can define the empty string itself as NULL. The nullable string fast path +// must match the generic nullable serde behavior instead of treating empty null_format as +// "null format is not configured". +TEST_F(TextV2ReaderTest, EmptyNullFormatProducesNullableValue) { + const auto null_path = (_test_dir / "empty_null_format.text").string(); + std::ofstream output(null_path, std::ios::binary); + output << "1,alice,10\n"; + output << "2,,20\n"; + output << "3,NULL,30\n"; + output.close(); + + _params.file_attributes.text_params.__set_null_format(""); + auto reader = create_reader(null_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))}; + request->local_positions.emplace(LocalColumnId(1), LocalIndex(0)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {1}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 3); + EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 0)); + EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 1)); + EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 2)); + EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 2), "NULL"); +} + +// Scenario: TEXT_WITH_NAMES_AND_TYPES-style headers share the delimited text base skip path with +// CSV. Both header records must be skipped before the first data row is read. +TEST_F(TextV2ReaderTest, HeaderNamesAndTypesSkipsTwoLines) { + const auto header_path = (_test_dir / "header_names_types.text").string(); + std::ofstream output(header_path, std::ios::binary); + output << "id,name,score\n"; + output << "INT,STRING,INT\n"; + output << "7,carol,70\n"; + output.close(); + + _params.file_attributes.__set_header_type(BeConsts::CSV_WITH_NAMES_AND_TYPES); + auto reader = create_reader(header_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {0}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 7); +} + +// Scenario: the shared delimited text base removes UTF-8 BOM from the first returned data line. +// This matters for headerless text files whose first column is numeric. +TEST_F(TextV2ReaderTest, BomIsRemovedFromFirstDataLineWithoutHeader) { + const auto bom_path = (_test_dir / "bom_data.text").string(); + std::ofstream output(bom_path, std::ios::binary); + output.write("\xEF\xBB\xBF", 3); + output << "5,bom,50\n"; + output.close(); + + auto reader = create_reader(bom_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {0}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 5); +} + +// Scenario: when FE does not set header_type, skip_lines should be honored by the shared +// delimited text base before TextReader starts splitting rows. +TEST_F(TextV2ReaderTest, SkipLinesUsedWhenHeaderTypeUnset) { + const auto skip_path = (_test_dir / "skip_lines.text").string(); + std::ofstream output(skip_path, std::ios::binary); + output << "skip me\n"; + output << "skip me too\n"; + output << "3,dan,30\n"; + output.close(); + + _params.file_attributes.__isset.header_type = false; + _params.file_attributes.__set_skip_lines(2); + auto reader = create_reader(skip_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {0}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 3); +} + +// Scenario: Hive TEXTFILE treats an empty physical line as a record. For the first field it +// deserializes an empty value; missing trailing fields are filled with null_format. +TEST_F(TextV2ReaderTest, EmptyLineAsRecordByDefault) { + const auto empty_line_path = (_test_dir / "empty_line.text").string(); + std::ofstream output(empty_line_path, std::ios::binary); + output << "\n"; + output << "4,erin,40\n"; + output.close(); + + auto reader = create_reader(empty_line_path, &_params, _slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)), + LocalColumnIndex::top_level(LocalColumnId(1)), + LocalColumnIndex::top_level(LocalColumnId(2))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + request->local_positions.emplace(LocalColumnId(1), LocalIndex(1)); + request->local_positions.emplace(LocalColumnId(2), LocalIndex(2)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {0, 1, 2}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 2); + EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 0)); + EXPECT_TRUE(is_null_at(*block.get_by_position(1).column, 0)); + EXPECT_TRUE(is_null_at(*block.get_by_position(2).column, 0)); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 1), 4); + EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 1), "erin"); + EXPECT_EQ(nullable_int_at(*block.get_by_position(2).column, 1), 40); +} + +// Scenario: for a single-column Hive TEXTFILE table, an empty physical line is one empty string +// field rather than a skipped row. +TEST_F(TextV2ReaderTest, EmptyLineAsSingleEmptyStringField) { + const auto empty_line_path = (_test_dir / "empty_line_single_string.text").string(); + std::ofstream output(empty_line_path, std::ios::binary); + output << "\n"; + output << "erin\n"; + output.close(); + + _params.__set_column_idxs({0}); + const std::vector slots {make_test_slot( + &_pool, 0, 0, make_nullable(std::make_shared()), "value")}; + auto reader = create_reader(empty_line_path, &_params, slots, &_state, &_profile); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {0}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 2); + EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 0)); + EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), ""); + EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 1), "erin"); +} + +// Scenario: text v2 COUNT pushdown counts empty physical lines as Hive TEXTFILE records. +TEST_F(TextV2ReaderTest, CountAggregatePreservesEmptyLines) { + const auto empty_line_path = (_test_dir / "empty_line_count.text").string(); + std::ofstream output(empty_line_path, std::ios::binary); + output << "\n"; + output << "4,erin,40\n"; + output.close(); + + auto reader = create_reader(empty_line_path, &_params, _slots, &_state, &_profile); + auto request = std::make_shared(); + ASSERT_TRUE(reader->open(request).ok()); + + FileAggregateRequest aggregate_request; + aggregate_request.agg_type = TPushAggOp::type::COUNT; + FileAggregateResult aggregate_result; + ASSERT_TRUE(reader->get_aggregate_result(aggregate_request, &aggregate_result).ok()); + EXPECT_EQ(aggregate_result.count, 2); +} + +// Scenario: Text v2 COUNT pushdown scans rows because text files do not expose row-count metadata. +TEST_F(TextV2ReaderTest, CountAggregateScansRows) { + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile); + auto request = std::make_shared(); + ASSERT_TRUE(reader->open(request).ok()); + + FileAggregateRequest aggregate_request; + aggregate_request.agg_type = TPushAggOp::type::COUNT; + FileAggregateResult aggregate_result; + ASSERT_TRUE(reader->get_aggregate_result(aggregate_request, &aggregate_result).ok()); + EXPECT_EQ(aggregate_result.count, 2); +} + +// Scenario: a non-first split starts inside a text record and must skip the partial first line. +TEST_F(TextV2ReaderTest, NonFirstSplitSkipsPartialFirstRecord) { + const auto split_path = (_test_dir / "split.text").string(); + std::ofstream output(split_path, std::ios::binary); + output << "1,skip,10\n"; + output << "2,bob,20\n"; + output.close(); + + auto reader = create_reader(split_path, &_params, _slots, &_state, &_profile, + /*range_start_offset=*/3); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_block(schema, {0}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 2); +} + +// Scenario: compressed text cannot be split at arbitrary byte offsets because the decompressor +// needs the stream from the beginning. V2 should reject such a split before constructing the line +// reader. +TEST_F(TextV2ReaderTest, NonFirstCompressedSplitReturnsError) { + _params.__set_compress_type(TFileCompressType::GZ); + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile, + /*range_start_offset=*/1); + + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + EXPECT_FALSE(reader->open(request).ok()); +} + +// Scenario: FileScanRequest is a TableReader-to-FileReader contract. Unknown TEXT ordinals, +// out-of-range block positions, and sparse block-position maps must fail during reader open. +TEST_F(TextV2ReaderTest, InvalidScanRequestReturnsError) { + { + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile); + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(99))}; + request->local_positions.emplace(LocalColumnId(99), LocalIndex(0)); + EXPECT_FALSE(reader->open(request).ok()); + } + { + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile); + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(2)); + EXPECT_FALSE(reader->open(request).ok()); + } + { + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile); + auto request = std::make_shared(); + request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)), + LocalColumnIndex::top_level(LocalColumnId(1))}; + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + request->local_positions.emplace(LocalColumnId(1), LocalIndex(0)); + EXPECT_FALSE(reader->open(request).ok()); + } +} + +// Scenario: unsupported aggregate requests must fail explicitly instead of returning partial +// results from the scan path. +TEST_F(TextV2ReaderTest, UnsupportedAggregateReturnsNotSupported) { + auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile); + auto request = std::make_shared(); + ASSERT_TRUE(reader->open(request).ok()); + + FileAggregateRequest aggregate_request; + aggregate_request.agg_type = TPushAggOp::type::MINMAX; + FileAggregateResult aggregate_result; + EXPECT_FALSE(reader->get_aggregate_result(aggregate_request, &aggregate_result).ok()); +} + +} // namespace +} // namespace doris::format::text diff --git a/be/test/format_v2/expr/cast_test.cpp b/be/test/format_v2/expr/cast_test.cpp new file mode 100644 index 00000000000000..341b89433f0c08 --- /dev/null +++ b/be/test/format_v2/expr/cast_test.cpp @@ -0,0 +1,172 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/expr/cast.h" + +#include + +#include +#include +#include + +#include "common/status.h" +#include "core/block/block.h" +#include "core/column/column_nullable.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/field.h" +#include "exprs/vexpr_context.h" +#include "exprs/vliteral.h" +#include "exprs/vslot_ref.h" +#include "runtime/descriptors.h" +#include "testutil/column_helper.h" +#include "testutil/mock/mock_runtime_state.h" + +namespace doris::format { + +class CastTest : public testing::Test { +protected: + void SetUp() override { state.set_enable_strict_cast(true); } + + static VExprContextSPtr create_context(const DataTypePtr& return_type, + const DataTypePtr& child_type, int child_column_id = 0) { + auto cast = Cast::create_shared(return_type); + cast->add_child(VSlotRef::create_shared(child_column_id, child_column_id, -1, child_type, + "source_column")); + return VExprContext::create_shared(cast); + } + + Status prepare_open_execute(VExprContext* context, Block* block, int* result_column_id) { + RETURN_IF_ERROR(context->prepare(&state, RowDescriptor())); + RETURN_IF_ERROR(context->open(&state)); + return context->execute(block, result_column_id); + } + + MockRuntimeState state; +}; + +TEST_F(CastTest, CastIntSlotToBigInt) { + auto source_type = std::make_shared(); + auto return_type = std::make_shared(); + auto context = create_context(return_type, source_type); + Block block; + block.insert(ColumnHelper::create_column_with_name({1, -2, 3})); + + int result_column_id = -1; + auto status = prepare_open_execute(context.get(), &block, &result_column_id); + ASSERT_TRUE(status.ok()) << status; + + ASSERT_EQ(result_column_id, 1); + ASSERT_EQ(block.columns(), 2); + EXPECT_EQ(block.get_by_position(result_column_id).type, return_type); + const auto& result_column = + assert_cast(*block.get_by_position(result_column_id).column); + EXPECT_EQ(result_column.get_data()[0], 1); + EXPECT_EQ(result_column.get_data()[1], -2); + EXPECT_EQ(result_column.get_data()[2], 3); + + context->close(); +} + +TEST_F(CastTest, CastStringSlotToNullableInt) { + state.set_enable_strict_cast(false); + auto source_type = std::make_shared(); + auto return_type = std::make_shared(std::make_shared()); + auto context = create_context(return_type, source_type); + Block block; + block.insert(ColumnHelper::create_column_with_name({"10", "bad", "-3"})); + + int result_column_id = -1; + auto status = prepare_open_execute(context.get(), &block, &result_column_id); + ASSERT_TRUE(status.ok()) << status; + + const auto& nullable_column = + assert_cast(*block.get_by_position(result_column_id).column); + const auto& result_column = + assert_cast(nullable_column.get_nested_column()); + const auto& null_map = nullable_column.get_null_map_data(); + EXPECT_EQ(result_column.get_data()[0], 10); + EXPECT_EQ(result_column.get_data()[2], -3); + EXPECT_EQ(null_map[0], 0); + EXPECT_EQ(null_map[1], 1); + EXPECT_EQ(null_map[2], 0); + + context->close(); +} + +TEST_F(CastTest, CastLiteralToString) { + auto source_type = std::make_shared(); + auto return_type = std::make_shared(); + auto cast = Cast::create_shared(return_type); + cast->add_child(VLiteral::create_shared(source_type, Field::create_field(123))); + auto context = VExprContext::create_shared(cast); + Block block; + block.insert(ColumnHelper::create_column_with_name({1, 2, 3})); + + int result_column_id = -1; + auto status = prepare_open_execute(context.get(), &block, &result_column_id); + ASSERT_TRUE(status.ok()) << status; + + const auto& result = block.get_by_position(result_column_id); + EXPECT_EQ(result.type->to_string(*result.column, 0), "123"); + EXPECT_EQ(result.type->to_string(*result.column, 1), "123"); + EXPECT_EQ(result.type->to_string(*result.column, 2), "123"); + + context->close(); +} + +TEST_F(CastTest, EmptyBlockAppendsEmptyResultColumn) { + auto source_type = std::make_shared(); + auto return_type = std::make_shared(); + auto context = create_context(return_type, source_type); + Block block; + block.insert(ColumnHelper::create_column_with_name({})); + + int result_column_id = -1; + auto status = prepare_open_execute(context.get(), &block, &result_column_id); + ASSERT_TRUE(status.ok()) << status; + + ASSERT_EQ(result_column_id, 1); + EXPECT_EQ(block.get_by_position(result_column_id).column->size(), 0); + + context->close(); +} + +TEST_F(CastTest, PrepareRejectsMissingChild) { + auto cast = Cast::create_shared(std::make_shared()); + VExprContext context(cast); + + auto status = context.prepare(&state, RowDescriptor()); + ASSERT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("exactly 1 child expr"), std::string::npos); +} + +TEST_F(CastTest, PrepareRejectsMultipleChildren) { + auto child_type = std::make_shared(); + auto cast = Cast::create_shared(std::make_shared()); + cast->add_child(VSlotRef::create_shared(0, 0, -1, child_type, "c0")); + cast->add_child(VSlotRef::create_shared(1, 1, -1, child_type, "c1")); + VExprContext context(cast); + + auto status = context.prepare(&state, RowDescriptor()); + ASSERT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("exactly 1 child expr"), std::string::npos); +} + +} // namespace doris::format diff --git a/be/test/format_v2/expr/delete_predicate_test.cpp b/be/test/format_v2/expr/delete_predicate_test.cpp new file mode 100644 index 00000000000000..264a9fdf9b19f5 --- /dev/null +++ b/be/test/format_v2/expr/delete_predicate_test.cpp @@ -0,0 +1,168 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/expr/delete_predicate.h" + +#include + +#include +#include +#include + +#include "common/status.h" +#include "core/block/block.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_number.h" +#include "exprs/vexpr_context.h" +#include "runtime/descriptors.h" +#include "testutil/mock/mock_slot_ref.h" + +namespace doris::format { + +class DeletePredicateTest : public testing::Test { +protected: + static Block make_block(const std::vector& row_ids) { + auto column = ColumnInt64::create(); + for (auto row_id : row_ids) { + column->insert_value(row_id); + } + + Block block; + block.insert({std::move(column), std::make_shared(), "row_id"}); + return block; + } + + static std::vector result_column_data(const Block& block, int result_column_id) { + const auto& result_column = + assert_cast(*block.get_by_position(result_column_id).column); + return {result_column.get_data().begin(), result_column.get_data().end()}; + } + + static Status execute_delete_predicate(const std::vector& deleted_rows, Block* block, + int* result_column_id) { + auto delete_predicate = std::make_shared(deleted_rows); + delete_predicate->_open_finished = true; + delete_predicate->add_child( + std::make_shared(0, std::make_shared())); + + VExprContext context(delete_predicate); + return delete_predicate->execute(&context, block, result_column_id); + } +}; + +TEST_F(DeletePredicateTest, MatchDeletedRowsInInputRange) { + const std::vector deleted_rows {-3, 1, 4, 8, 12, 20}; + auto block = make_block({0, 1, 2, 3, 4, 5, 8, 12}); + + int result_column_id = -1; + auto status = execute_delete_predicate(deleted_rows, &block, &result_column_id); + ASSERT_TRUE(status.ok()) << status; + + EXPECT_EQ(result_column_id, 1); + EXPECT_EQ(result_column_data(block, result_column_id), + std::vector({0, 1, 0, 0, 1, 0, 1, 1})); +} + +TEST_F(DeletePredicateTest, EmptyDeletedRowsReturnAllFalse) { + const std::vector deleted_rows; + auto block = make_block({1, 2, 3}); + + int result_column_id = -1; + auto status = execute_delete_predicate(deleted_rows, &block, &result_column_id); + ASSERT_TRUE(status.ok()) << status; + + EXPECT_EQ(result_column_data(block, result_column_id), std::vector({0, 0, 0})); +} + +TEST_F(DeletePredicateTest, DeletedRowsOutsideInputRangeReturnAllFalse) { + const std::vector deleted_rows {-10, -1, 10, 11}; + auto block = make_block({1, 2, 3}); + + int result_column_id = -1; + auto status = execute_delete_predicate(deleted_rows, &block, &result_column_id); + ASSERT_TRUE(status.ok()) << status; + + EXPECT_EQ(result_column_data(block, result_column_id), std::vector({0, 0, 0})); +} + +TEST_F(DeletePredicateTest, EmptyRowIdColumnAppendsEmptyResultColumn) { + const std::vector deleted_rows {1, 2, 3}; + auto block = make_block({}); + + int result_column_id = -1; + auto status = execute_delete_predicate(deleted_rows, &block, &result_column_id); + ASSERT_TRUE(status.ok()) << status; + + EXPECT_EQ(block.columns(), 2); + EXPECT_EQ(result_column_id, 1); + EXPECT_EQ(result_column_data(block, result_column_id), std::vector({})); +} + +TEST_F(DeletePredicateTest, MissingRowIdColumnReturnsError) { + const std::vector deleted_rows {1, 2, 3}; + Block block; + + int result_column_id = -1; + auto status = execute_delete_predicate(deleted_rows, &block, &result_column_id); + ASSERT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("invalid column id"), std::string::npos); + EXPECT_EQ(block.columns(), 0); + EXPECT_EQ(result_column_id, -1); +} + +TEST_F(DeletePredicateTest, MissingRowIdChildReturnsError) { + const std::vector deleted_rows {1}; + auto block = make_block({1}); + auto delete_predicate = std::make_shared(deleted_rows); + delete_predicate->_open_finished = true; + VExprContext context(delete_predicate); + + int result_column_id = -1; + auto status = delete_predicate->execute(&context, &block, &result_column_id); + ASSERT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("exactly 1 child expr"), std::string::npos); +} + +TEST_F(DeletePredicateTest, ExecuteColumnImplReturnsError) { + const std::vector deleted_rows {1}; + DeletePredicate delete_predicate(deleted_rows); + VExprContext context(std::make_shared(deleted_rows)); + ColumnPtr result_column; + + auto status = + delete_predicate.execute_column_impl(&context, nullptr, nullptr, 0, result_column); + ASSERT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("DeletePredicate::execute_column_impl"), std::string::npos); +} + +TEST_F(DeletePredicateTest, LifecycleAndDebugString) { + const std::vector deleted_rows {1}; + DeletePredicate delete_predicate(deleted_rows); + VExprContext context(std::make_shared(deleted_rows)); + RowDescriptor row_desc; + + auto status = delete_predicate.prepare(nullptr, row_desc, &context); + ASSERT_TRUE(status.ok()) << status; + EXPECT_EQ(delete_predicate.expr_name(), "DeletePredicate"); + EXPECT_EQ(delete_predicate.debug_string(), "DeletePredicate"); + + status = delete_predicate.open(nullptr, &context, FunctionContext::THREAD_LOCAL); + ASSERT_TRUE(status.ok()) << status; + delete_predicate.close(&context, FunctionContext::THREAD_LOCAL); +} + +} // namespace doris::format diff --git a/be/test/format_v2/expr/equality_delete_predicate_test.cpp b/be/test/format_v2/expr/equality_delete_predicate_test.cpp new file mode 100644 index 00000000000000..886a86713fe8da --- /dev/null +++ b/be/test/format_v2/expr/equality_delete_predicate_test.cpp @@ -0,0 +1,181 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/expr/equality_delete_predicate.h" + +#include + +#include +#include +#include +#include + +#include "common/status.h" +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "exprs/vexpr_context.h" +#include "format_v2/expr/cast.h" +#include "runtime/descriptors.h" +#include "testutil/column_helper.h" +#include "testutil/mock/mock_runtime_state.h" +#include "testutil/mock/mock_slot_ref.h" + +namespace doris::format { + +class EqualityDeletePredicateTest : public testing::Test { +protected: + static ColumnWithTypeAndName make_nullable_int_column( + const std::string& name, const std::vector>& values) { + auto data = ColumnInt32::create(); + auto null_map = ColumnUInt8::create(); + for (const auto& value : values) { + data->insert_value(value.value_or(0)); + null_map->insert_value(!value.has_value()); + } + auto type = make_nullable(std::make_shared()); + return {ColumnNullable::create(std::move(data), std::move(null_map)), type, name}; + } + + static ColumnWithTypeAndName make_nullable_string_column( + const std::string& name, const std::vector>& values) { + auto data = ColumnString::create(); + auto null_map = ColumnUInt8::create(); + for (const auto& value : values) { + const std::string data_value = value.value_or(""); + data->insert_data(data_value.data(), data_value.size()); + null_map->insert_value(!value.has_value()); + } + auto type = make_nullable(std::make_shared()); + return {ColumnNullable::create(std::move(data), std::move(null_map)), type, name}; + } + + static std::vector result_column_data(const Block& block, int result_column_id) { + const auto& result_column = + assert_cast(*block.get_by_position(result_column_id).column); + return {result_column.get_data().begin(), result_column.get_data().end()}; + } + + static Status execute_equality_delete_predicate(Block delete_block, std::vector field_ids, + Block* data_block, int* result_column_id) { + auto predicate = + std::make_shared(std::move(delete_block), field_ids); + predicate->_open_finished = true; + for (size_t idx = 0; idx < field_ids.size(); ++idx) { + predicate->add_child( + std::make_shared(idx, data_block->get_by_position(idx).type)); + } + + VExprContext context(predicate); + return predicate->execute(&context, data_block, result_column_id); + } + + static Status execute_prepared_equality_delete_predicate(const VExprContextSPtr& context, + MockRuntimeState* state, + Block* data_block, + int* result_column_id) { + RETURN_IF_ERROR(context->prepare(state, RowDescriptor())); + RETURN_IF_ERROR(context->open(state)); + return context->execute(data_block, result_column_id); + } +}; + +TEST_F(EqualityDeletePredicateTest, MatchSingleColumn) { + Block delete_block; + delete_block.insert(make_nullable_int_column("id", {1, 4})); + Block data_block; + data_block.insert(make_nullable_int_column("id", {1, 2, 3, 4})); + + int result_column_id = -1; + auto status = execute_equality_delete_predicate(std::move(delete_block), {1}, &data_block, + &result_column_id); + ASSERT_TRUE(status.ok()) << status; + EXPECT_EQ(result_column_data(data_block, result_column_id), std::vector({1, 0, 0, 1})); +} + +TEST_F(EqualityDeletePredicateTest, MatchMultipleColumns) { + Block delete_block; + delete_block.insert(make_nullable_int_column("id", {1, 2})); + delete_block.insert(make_nullable_string_column("name", {"a", "b"})); + Block data_block; + data_block.insert(make_nullable_int_column("id", {1, 1, 2, 2})); + data_block.insert(make_nullable_string_column("name", {"a", "b", "a", "b"})); + + int result_column_id = -1; + auto status = execute_equality_delete_predicate(std::move(delete_block), {1, 2}, &data_block, + &result_column_id); + ASSERT_TRUE(status.ok()) << status; + EXPECT_EQ(result_column_data(data_block, result_column_id), std::vector({1, 0, 0, 1})); +} + +TEST_F(EqualityDeletePredicateTest, MatchNullValues) { + Block delete_block; + delete_block.insert(make_nullable_int_column("id", {std::nullopt})); + Block data_block; + data_block.insert(make_nullable_int_column("id", {1, std::nullopt, 3})); + + int result_column_id = -1; + auto status = execute_equality_delete_predicate(std::move(delete_block), {1}, &data_block, + &result_column_id); + ASSERT_TRUE(status.ok()) << status; + EXPECT_EQ(result_column_data(data_block, result_column_id), std::vector({0, 1, 0})); +} + +TEST_F(EqualityDeletePredicateTest, MatchAfterCastToDeleteKeyType) { + Block delete_block; + delete_block.insert(make_nullable_int_column("id", {1, 4})); + Block data_block; + data_block.insert(ColumnHelper::create_column_with_name({1, 2, 4})); + + auto predicate = std::make_shared(std::move(delete_block), + std::vector {1}); + auto cast_expr = Cast::create_shared(make_nullable(std::make_shared())); + cast_expr->add_child(std::make_shared(0, data_block.get_by_position(0).type)); + predicate->add_child(std::move(cast_expr)); + auto context = VExprContext::create_shared(predicate); + MockRuntimeState state; + + int result_column_id = -1; + auto status = execute_prepared_equality_delete_predicate(context, &state, &data_block, + &result_column_id); + ASSERT_TRUE(status.ok()) << status; + EXPECT_EQ(result_column_data(data_block, result_column_id), std::vector({1, 0, 1})); + context->close(); +} + +TEST_F(EqualityDeletePredicateTest, ChildCountMismatchReturnsError) { + Block delete_block; + delete_block.insert(make_nullable_int_column("id", {1})); + auto predicate = std::make_shared(std::move(delete_block), + std::vector {1}); + predicate->_open_finished = true; + Block data_block; + data_block.insert(make_nullable_int_column("id", {1})); + VExprContext context(predicate); + + int result_column_id = -1; + auto status = predicate->execute(&context, &data_block, &result_column_id); + ASSERT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("should have 1 child exprs"), std::string::npos); +} + +} // namespace doris::format diff --git a/be/test/format_v2/json/json_reader_test.cpp b/be/test/format_v2/json/json_reader_test.cpp new file mode 100644 index 00000000000000..31c77501ce67c6 --- /dev/null +++ b/be/test/format_v2/json/json_reader_test.cpp @@ -0,0 +1,608 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/json/json_reader.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include "common/object_pool.h" +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type/data_type_struct.h" +#include "exprs/vexpr.h" +#include "exprs/vexpr_context.h" +#include "format_v2/column_data.h" +#include "io/io_common.h" +#include "runtime/descriptors.h" +#include "runtime/runtime_profile.h" +#include "testutil/mock/mock_runtime_state.h" + +namespace doris::format::json { +namespace { + +TFileScanRangeParams json_scan_params(bool read_json_by_line = true, bool strip_outer_array = false, + std::string jsonpaths = "", std::string json_root = "", + bool ignore_malformed = false) { + TFileScanRangeParams params; + params.__set_format_type(TFileFormatType::FORMAT_JSON); + params.__set_file_type(TFileType::FILE_LOCAL); + params.__set_compress_type(TFileCompressType::PLAIN); + TFileAttributes attributes; + TFileTextScanRangeParams text_params; + text_params.__set_line_delimiter("\n"); + attributes.__set_text_params(std::move(text_params)); + attributes.__set_read_json_by_line(read_json_by_line); + attributes.__set_strip_outer_array(strip_outer_array); + attributes.__set_num_as_string(false); + attributes.__set_fuzzy_parse(false); + if (!jsonpaths.empty()) { + attributes.__set_jsonpaths(std::move(jsonpaths)); + } + if (!json_root.empty()) { + attributes.__set_json_root(std::move(json_root)); + } + if (ignore_malformed) { + attributes.__set_openx_json_ignore_malformed(true); + } + params.__set_file_attributes(std::move(attributes)); + return params; +} + +SlotDescriptor* make_test_slot(ObjectPool* pool, int slot_id, int slot_idx, DataTypePtr type, + const std::string& name) { + TSlotDescriptor slot_desc; + slot_desc.__set_id(slot_id); + slot_desc.__set_parent(0); + slot_desc.__set_slotType(type->to_thrift()); + slot_desc.__set_columnPos(slot_idx); + slot_desc.__set_byteOffset(0); + if (type->is_nullable()) { + slot_desc.__set_nullIndicatorByte(slot_idx / 8); + slot_desc.__set_nullIndicatorBit(slot_idx % 8); + } else { + slot_desc.__set_nullIndicatorByte(0); + slot_desc.__set_nullIndicatorBit(-1); + } + slot_desc.__set_slotIdx(slot_idx); + slot_desc.__set_isMaterialized(true); + slot_desc.__set_colName(name); + return pool->add(new SlotDescriptor(slot_desc)); +} + +std::vector build_slots(ObjectPool* pool) { + return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared()), "id"), + make_test_slot(pool, 1, 1, make_nullable(std::make_shared()), "name")}; +} + +std::vector build_slots_with_required_name(ObjectPool* pool) { + return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared()), "id"), + make_test_slot(pool, 1, 1, std::make_shared(), "name")}; +} + +std::vector build_complex_slots(ObjectPool* pool) { + auto varchar_type = make_nullable(std::make_shared(8, TYPE_VARCHAR)); + auto array_type = make_nullable( + std::make_shared(make_nullable(std::make_shared()))); + auto map_type = make_nullable(std::make_shared( + std::make_shared(4, TYPE_CHAR), + make_nullable(std::make_shared(16, TYPE_VARCHAR)))); + auto struct_type = make_nullable(std::make_shared( + DataTypes {std::make_shared(8, TYPE_VARCHAR), + make_nullable(std::make_shared( + make_nullable(std::make_shared())))}, + Strings {"name", "scores"})); + return {make_test_slot(pool, 0, 0, varchar_type, "nickname"), + make_test_slot(pool, 1, 1, array_type, "tags"), + make_test_slot(pool, 2, 2, map_type, "props"), + make_test_slot(pool, 3, 3, struct_type, "profile")}; +} + +std::unique_ptr file_description(const std::string& path) { + auto desc = std::make_unique(); + desc->path = path; + desc->file_size = static_cast(std::filesystem::file_size(path)); + desc->range_start_offset = 0; + desc->range_size = desc->file_size; + return desc; +} + +std::filesystem::path write_json_file(const std::string& name, const std::string& content) { + const auto test_dir = std::filesystem::temp_directory_path() / "doris_format_v2_json_reader"; + std::filesystem::create_directories(test_dir); + const auto file_path = test_dir / name; + std::ofstream out(file_path); + out << content; + return file_path; +} + +TFileRangeDesc file_range(const std::filesystem::path& file_path) { + TFileRangeDesc range; + range.__set_path(file_path.string()); + range.__set_start_offset(0); + range.__set_size(static_cast(std::filesystem::file_size(file_path))); + range.__set_file_size(static_cast(std::filesystem::file_size(file_path))); + return range; +} + +Block make_block(const std::vector& schema, + const std::vector& local_ids) { + Block block; + for (const auto local_id : local_ids) { + const auto it = std::ranges::find_if( + schema, [&](const auto& column) { return column.local_id == local_id; }); + EXPECT_TRUE(it != schema.end()); + block.insert({it->type->create_column(), it->type, it->name}); + } + return block; +} + +struct ReadResult { + Status status; + Status second_status = Status::OK(); + Block block; + size_t rows = 0; + bool eof = false; + size_t second_rows = 0; + bool second_eof = false; + std::vector schema; +}; + +ReadResult read_once(const std::string& file_name, const std::string& content, + TFileScanRangeParams params, const std::vector& slots, + const std::vector& requested_local_ids, bool read_twice = false) { + const auto file_path = write_json_file(file_name, content); + auto range = file_range(file_path); + + auto system_properties = std::make_shared(); + system_properties->system_type = TFileType::FILE_LOCAL; + auto desc = file_description(file_path.string()); + RuntimeProfile profile("json_v2_reader_test"); + MockRuntimeState state; + JsonReader reader(system_properties, desc, nullptr, &profile, ¶ms, range, slots); + + ReadResult result; + result.status = reader.init(&state); + if (!result.status.ok()) { + return result; + } + result.status = reader.get_schema(&result.schema); + if (!result.status.ok()) { + return result; + } + + auto request = std::make_shared(); + for (size_t i = 0; i < requested_local_ids.size(); ++i) { + request->local_positions.emplace(LocalColumnId(requested_local_ids[i]), LocalIndex(i)); + } + result.status = reader.open(request); + if (!result.status.ok()) { + return result; + } + + result.block = make_block(result.schema, requested_local_ids); + result.status = reader.get_block(&result.block, &result.rows, &result.eof); + if (result.status.ok() && read_twice) { + auto eof_block = make_block(result.schema, requested_local_ids); + result.second_status = + reader.get_block(&eof_block, &result.second_rows, &result.second_eof); + } + return result; +} + +std::string nullable_string_at(const IColumn& column, size_t row) { + const auto& nullable = assert_cast(column); + const auto& nested = assert_cast(nullable.get_nested_column()); + return nested.get_data_at(row).to_string(); +} + +std::string string_at(const IColumn& column, size_t row) { + const auto& nested = assert_cast(column); + return nested.get_data_at(row).to_string(); +} + +int32_t nullable_int_at(const IColumn& column, size_t row) { + const auto& nullable = assert_cast(column); + const auto& nested = assert_cast(nullable.get_nested_column()); + return nested.get_data()[row]; +} + +bool nullable_is_null_at(const IColumn& column, size_t row) { + const auto& nullable = assert_cast(column); + return nullable.is_null_at(row); +} + +class NullableIntGreaterThanExpr final : public VExpr { +public: + NullableIntGreaterThanExpr(size_t block_position, int32_t value) + : VExpr(std::make_shared(), false), + _block_position(block_position), + _value(value) {} + + const std::string& expr_name() const override { return _name; } + + bool is_constant() const override { return false; } + + Status execute_column_impl(VExprContext*, const Block* block, const Selector* selector, + size_t count, ColumnPtr& result_column) const override { + DORIS_CHECK(block != nullptr); + const auto& nullable = + assert_cast(*block->get_by_position(_block_position).column); + const auto& data = assert_cast(nullable.get_nested_column()); + + auto result = ColumnUInt8::create(); + auto& result_data = result->get_data(); + result_data.resize(count); + for (size_t row = 0; row < count; ++row) { + const auto source_row = selector == nullptr ? row : (*selector)[row]; + result_data[row] = + !nullable.is_null_at(source_row) && data.get_element(source_row) > _value; + } + result_column = std::move(result); + return Status::OK(); + } + + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = std::make_shared(_block_position, _value); + return Status::OK(); + } + +private: + size_t _block_position; + int32_t _value; + const std::string _name = "NullableIntGreaterThanExpr"; +}; + +VExprContextSPtr prepared_conjunct(RuntimeState* state, const VExprSPtr& expr) { + auto context = VExprContext::create_shared(expr); + auto status = context->prepare(state, RowDescriptor()); + EXPECT_TRUE(status.ok()) << status; + status = context->open(state); + EXPECT_TRUE(status.ok()) << status; + return context; +} + +} // namespace + +TEST(JsonReaderTest, ReadsRequestedColumnsInFileScanRequestOrder) { + ObjectPool pool; + auto slots = build_slots(&pool); + auto result = read_once("order.jsonl", + R"({"id":1,"name":"alice"})" + "\n" + R"({"id":2,"name":"bob"})" + "\n", + json_scan_params(), slots, {1, 0}, true); + + ASSERT_TRUE(result.status.ok()) << result.status.to_string(); + ASSERT_EQ(result.schema.size(), 2); + EXPECT_EQ(result.schema[0].name, "id"); + EXPECT_EQ(result.schema[0].local_id, 0); + EXPECT_EQ(result.schema[1].name, "name"); + EXPECT_EQ(result.schema[1].local_id, 1); + ASSERT_EQ(result.rows, 2); + ASSERT_EQ(result.block.columns(), 2); + EXPECT_EQ(nullable_string_at(*result.block.get_by_position(0).column, 0), "alice"); + EXPECT_EQ(nullable_string_at(*result.block.get_by_position(0).column, 1), "bob"); + EXPECT_EQ(nullable_int_at(*result.block.get_by_position(1).column, 0), 1); + EXPECT_EQ(nullable_int_at(*result.block.get_by_position(1).column, 1), 2); + ASSERT_TRUE(result.second_status.ok()) << result.second_status.to_string(); + EXPECT_EQ(result.second_rows, 0); + EXPECT_TRUE(result.second_eof); +} + +TEST(JsonReaderTest, ReadsSingleDocumentOuterArray) { + ObjectPool pool; + auto slots = build_slots(&pool); + auto result = + read_once("outer_array.json", R"([{"id":3,"name":"carol"},{"id":4,"name":"dave"}])", + json_scan_params(false, true), slots, {0, 1}); + + ASSERT_TRUE(result.status.ok()) << result.status.to_string(); + ASSERT_EQ(result.rows, 2); + EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 0), 3); + EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 0), "carol"); + EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 1), 4); + EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 1), "dave"); +} + +TEST(JsonReaderTest, ReadsJsonRootByLine) { + ObjectPool pool; + auto slots = build_slots(&pool); + auto result = read_once("json_root.jsonl", + R"({"payload":{"id":5,"name":"eve"}})" + "\n" + R"({"payload":{"id":6,"name":"frank"}})" + "\n", + json_scan_params(true, false, "", "$.payload"), slots, {0, 1}); + + ASSERT_TRUE(result.status.ok()) << result.status.to_string(); + ASSERT_EQ(result.rows, 2); + EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 0), 5); + EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 0), "eve"); + EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 1), 6); + EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 1), "frank"); +} + +TEST(JsonReaderTest, ReadsJsonPathsBySourceSlotAndReturnsRequestedBlockOrder) { + ObjectPool pool; + auto slots = build_slots(&pool); + auto result = read_once("jsonpaths.jsonl", + R"({"payload":{"id":7,"user":"grace"}})" + "\n" + R"({"payload":{"id":8,"user":"heidi"}})" + "\n", + json_scan_params(true, false, R"(["$.payload.id","$.payload.user"])"), + slots, {1, 0}); + + ASSERT_TRUE(result.status.ok()) << result.status.to_string(); + ASSERT_EQ(result.rows, 2); + EXPECT_EQ(nullable_string_at(*result.block.get_by_position(0).column, 0), "grace"); + EXPECT_EQ(nullable_string_at(*result.block.get_by_position(0).column, 1), "heidi"); + EXPECT_EQ(nullable_int_at(*result.block.get_by_position(1).column, 0), 7); + EXPECT_EQ(nullable_int_at(*result.block.get_by_position(1).column, 1), 8); +} + +TEST(JsonReaderTest, ReadsJsonPathsFromSingleDocumentOuterArray) { + ObjectPool pool; + auto slots = build_slots(&pool); + auto result = read_once( + "outer_array_jsonpaths.json", + R"([{"payload":{"id":12,"user":"kate"}},{"payload":{"id":13,"user":"leo"}}])", + json_scan_params(false, true, R"(["$.payload.id","$.payload.user"])"), slots, {0, 1}); + + ASSERT_TRUE(result.status.ok()) << result.status.to_string(); + ASSERT_EQ(result.rows, 2); + EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 0), 12); + EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 0), "kate"); + EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 1), 13); + EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 1), "leo"); +} + +TEST(JsonReaderTest, FillsMissingNullableColumnWithNull) { + ObjectPool pool; + auto slots = build_slots(&pool); + auto result = read_once("missing_nullable.jsonl", + R"({"id":9})" + "\n", + json_scan_params(), slots, {0, 1}); + + ASSERT_TRUE(result.status.ok()) << result.status.to_string(); + ASSERT_EQ(result.rows, 1); + EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 0), 9); + EXPECT_TRUE(nullable_is_null_at(*result.block.get_by_position(1).column, 0)); +} + +TEST(JsonReaderTest, ReturnsErrorForMissingRequiredColumn) { + ObjectPool pool; + auto slots = build_slots_with_required_name(&pool); + auto result = read_once("missing_required.jsonl", + R"({"id":10})" + "\n", + json_scan_params(), slots, {0, 1}); + + EXPECT_FALSE(result.status.ok()); +} + +TEST(JsonReaderTest, ReadsPresentRequiredColumn) { + ObjectPool pool; + auto slots = build_slots_with_required_name(&pool); + auto result = read_once("present_required.jsonl", + R"({"id":14,"name":"mallory"})" + "\n", + json_scan_params(), slots, {0, 1}); + + ASSERT_TRUE(result.status.ok()) << result.status.to_string(); + ASSERT_EQ(result.schema.size(), 2); + EXPECT_TRUE(result.schema[0].type->is_nullable()); + EXPECT_FALSE(result.schema[1].type->is_nullable()); + ASSERT_EQ(result.rows, 1); + EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 0), 14); + EXPECT_EQ(string_at(*result.block.get_by_position(1).column, 0), "mallory"); +} + +TEST(JsonReaderTest, SynthesizesComplexFileSchemaFromSlotTypes) { + ObjectPool pool; + auto slots = build_complex_slots(&pool); + const auto file_path = write_json_file("complex_schema.jsonl", "{}\n"); + auto params = json_scan_params(); + auto range = file_range(file_path); + auto system_properties = std::make_shared(); + system_properties->system_type = TFileType::FILE_LOCAL; + auto desc = file_description(file_path.string()); + RuntimeProfile profile("json_v2_reader_complex_schema_test"); + MockRuntimeState state; + JsonReader reader(system_properties, desc, nullptr, &profile, ¶ms, range, slots); + + ASSERT_TRUE(reader.init(&state).ok()); + std::vector schema; + ASSERT_TRUE(reader.get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 4); + + EXPECT_EQ(schema[0].name, "nickname"); + EXPECT_EQ(remove_nullable(schema[0].type)->get_primitive_type(), TYPE_STRING); + + ASSERT_EQ(schema[1].children.size(), 1); + EXPECT_EQ(schema[1].children[0].name, "element"); + EXPECT_EQ(schema[1].children[0].local_id, 0); + EXPECT_EQ(remove_nullable(schema[1].children[0].type)->get_primitive_type(), TYPE_INT); + + ASSERT_EQ(schema[2].children.size(), 2); + EXPECT_EQ(schema[2].children[0].name, "key"); + EXPECT_EQ(schema[2].children[1].name, "value"); + EXPECT_EQ(remove_nullable(schema[2].children[0].type)->get_primitive_type(), TYPE_STRING); + EXPECT_EQ(remove_nullable(schema[2].children[1].type)->get_primitive_type(), TYPE_STRING); + + ASSERT_EQ(schema[3].children.size(), 2); + EXPECT_EQ(schema[3].children[0].name, "name"); + EXPECT_EQ(schema[3].children[1].name, "scores"); + EXPECT_EQ(remove_nullable(schema[3].children[0].type)->get_primitive_type(), TYPE_STRING); + ASSERT_EQ(schema[3].children[1].children.size(), 1); + EXPECT_EQ(schema[3].children[1].children[0].name, "element"); + EXPECT_EQ(remove_nullable(schema[3].children[1].children[0].type)->get_primitive_type(), + TYPE_INT); +} + +TEST(JsonReaderTest, RejectsInvalidFileScanRequestsBeforeOpeningFile) { + ObjectPool pool; + auto slots = build_slots(&pool); + const auto file_path = write_json_file("invalid_request.jsonl", "{}\n"); + auto params = json_scan_params(); + auto range = file_range(file_path); + auto system_properties = std::make_shared(); + system_properties->system_type = TFileType::FILE_LOCAL; + auto desc = file_description(file_path.string()); + RuntimeProfile profile("json_v2_reader_invalid_request_test"); + MockRuntimeState state; + JsonReader reader(system_properties, desc, nullptr, &profile, ¶ms, range, slots); + ASSERT_TRUE(reader.init(&state).ok()); + + auto unknown_column_request = std::make_shared(); + unknown_column_request->local_positions.emplace(LocalColumnId(9), LocalIndex(0)); + auto status = reader.open(unknown_column_request); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("unknown local column id 9"), std::string::npos); + + auto invalid_position_request = std::make_shared(); + invalid_position_request->local_positions.emplace(LocalColumnId(0), LocalIndex(2)); + status = reader.open(invalid_position_request); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("invalid block position 2"), std::string::npos); + + auto missing_position_request = std::make_shared(); + missing_position_request->local_positions.emplace(LocalColumnId(0), LocalIndex(1)); + missing_position_request->local_positions.emplace(LocalColumnId(1), LocalIndex(1)); + status = reader.open(missing_position_request); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("misses block position 0"), std::string::npos); + + std::vector schema; + ASSERT_TRUE(reader.get_schema(&schema).ok()); + auto block = make_block(schema, {0}); + size_t rows = 0; + bool eof = false; + status = reader.get_block(&block, &rows, &eof); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("reader is not open"), std::string::npos); +} + +TEST(JsonReaderTest, ReturnsErrorForMalformedJsonByDefault) { + ObjectPool pool; + auto slots = build_slots(&pool); + auto result = read_once("malformed_strict.jsonl", + "not-json\n" + R"({"id":11,"name":"judy"})" + "\n", + json_scan_params(), slots, {0, 1}); + + EXPECT_FALSE(result.status.ok()); +} + +TEST(JsonReaderTest, IgnoresMalformedJsonAsNullRowsWhenConfigured) { + ObjectPool pool; + auto slots = build_slots(&pool); + auto result = read_once("ignore_malformed.jsonl", + "not-json\n" + R"({"id":11,"name":"judy"})" + "\n", + json_scan_params(true, false, "", "", true), slots, {0, 1}); + + ASSERT_TRUE(result.status.ok()) << result.status.to_string(); + ASSERT_EQ(result.rows, 2); + EXPECT_TRUE(nullable_is_null_at(*result.block.get_by_position(0).column, 0)); + EXPECT_TRUE(nullable_is_null_at(*result.block.get_by_position(1).column, 0)); + EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 1), 11); + EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 1), "judy"); +} + +TEST(JsonReaderTest, SkipsEmptyJsonLine) { + ObjectPool pool; + auto slots = build_slots(&pool); + auto result = read_once("empty_line.jsonl", + "\n" + R"({"id":15,"name":"nancy"})" + "\n", + json_scan_params(), slots, {0, 1}); + + ASSERT_TRUE(result.status.ok()) << result.status.to_string(); + ASSERT_EQ(result.rows, 1); + EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 0), 15); + EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 0), "nancy"); +} + +// Scenario: JSON, Native, CSV, and Hive text all share the same file-local filter order: +// delete conjuncts run first, ordinary conjuncts run second, and only ordinary conjuncts contribute +// to IOContext::predicate_filtered_rows. This guards the JSON caller of the shared helper because +// CSV/Text already assert the optional profile-counter path. +TEST(JsonReaderTest, AppliesDeleteAndNormalConjunctsWithPredicateFilterAccounting) { + ObjectPool pool; + auto slots = build_slots(&pool); + const auto file_path = write_json_file("filters.jsonl", R"({"id":1,"name":"alice"})" + "\n" + R"({"id":2,"name":"bob"})" + "\n" + R"({"id":3,"name":"carol"})" + "\n"); + auto params = json_scan_params(); + auto range = file_range(file_path); + auto system_properties = std::make_shared(); + system_properties->system_type = TFileType::FILE_LOCAL; + auto desc = file_description(file_path.string()); + RuntimeProfile profile("json_v2_reader_filter_test"); + MockRuntimeState state; + auto io_ctx = std::make_shared(); + JsonReader reader(system_properties, desc, io_ctx, &profile, ¶ms, range, slots); + + ASSERT_TRUE(reader.init(&state).ok()); + std::vector schema; + ASSERT_TRUE(reader.get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + request->local_positions.emplace(LocalColumnId(1), LocalIndex(1)); + request->delete_conjuncts = { + prepared_conjunct(&state, std::make_shared(0, 1))}; + request->conjuncts = { + prepared_conjunct(&state, std::make_shared(0, 2))}; + ASSERT_TRUE(reader.open(request).ok()); + + auto block = make_block(schema, {0, 1}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader.get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 3); + EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 0), "carol"); + EXPECT_EQ(io_ctx->predicate_filtered_rows, 1); +} + +} // namespace doris::format::json diff --git a/be/test/format_v2/native/native_reader_test.cpp b/be/test/format_v2/native/native_reader_test.cpp new file mode 100644 index 00000000000000..aaa7aa90e0681e --- /dev/null +++ b/be/test/format_v2/native/native_reader_test.cpp @@ -0,0 +1,419 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/native/native_reader.h" + +#include + +#include +#include +#include +#include +#include + +#include "agent/be_exec_version_manager.h" +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "exprs/vexpr.h" +#include "exprs/vexpr_context.h" +#include "format/native/native_format.h" +#include "format_v2/column_mapper.h" +#include "io/fs/local_file_system.h" +#include "io/io_common.h" +#include "runtime/descriptors.h" +#include "runtime/runtime_profile.h" +#include "runtime/runtime_state.h" +#include "util/coding.h" +#include "util/uid_util.h" + +namespace doris::format::native { +namespace { + +std::unique_ptr file_description(const std::string& path) { + auto desc = std::make_unique(); + desc->path = path; + desc->file_size = static_cast(std::filesystem::file_size(path)); + desc->range_start_offset = 0; + desc->range_size = desc->file_size; + return desc; +} + +Status write_file(const std::string& path, std::string_view content) { + io::FileWriterPtr writer; + RETURN_IF_ERROR(io::global_local_filesystem()->create_file(path, &writer)); + if (!content.empty()) { + RETURN_IF_ERROR(writer->append({content.data(), content.size()})); + } + return writer->close(); +} + +std::unique_ptr create_reader(const std::string& path, RuntimeState* state, + RuntimeProfile* profile, + std::shared_ptr io_ctx = nullptr) { + auto system_properties = std::make_shared(); + system_properties->system_type = TFileType::FILE_LOCAL; + auto desc = file_description(path); + return std::make_unique(system_properties, desc, std::move(io_ctx), profile); +} + +Block make_source_block() { + auto id_column = ColumnInt32::create(); + id_column->insert_value(10); + id_column->insert_value(20); + + auto name_column = ColumnString::create(); + name_column->insert_data("alice", 5); + name_column->insert_data("bob", 3); + + Block block; + block.insert({id_column->get_ptr(), std::make_shared(), "id"}); + block.insert({name_column->get_ptr(), std::make_shared(), "name"}); + return block; +} + +Status write_native_file(const std::string& path, const Block& block) { + io::FileWriterPtr writer; + RETURN_IF_ERROR(io::global_local_filesystem()->create_file(path, &writer)); + RETURN_IF_ERROR(writer->append({DORIS_NATIVE_MAGIC, sizeof(DORIS_NATIVE_MAGIC)})); + + uint8_t version_buffer[sizeof(uint32_t)]; + encode_fixed32_le(version_buffer, DORIS_NATIVE_FORMAT_VERSION); + RETURN_IF_ERROR(writer->append({version_buffer, sizeof(version_buffer)})); + + PBlock pblock; + size_t uncompressed_bytes = 0; + size_t compressed_bytes = 0; + int64_t compressed_time = 0; + RETURN_IF_ERROR(block.serialize(BeExecVersionManager::get_newest_version(), &pblock, + &uncompressed_bytes, &compressed_bytes, &compressed_time, + segment_v2::CompressionTypePB::SNAPPY)); + + const std::string payload = pblock.SerializeAsString(); + uint8_t len_buffer[sizeof(uint64_t)]; + encode_fixed64_le(len_buffer, payload.size()); + RETURN_IF_ERROR(writer->append({len_buffer, sizeof(len_buffer)})); + RETURN_IF_ERROR(writer->append(payload)); + return writer->close(); +} + +Block make_request_block(const std::vector& schema, + const std::vector& local_ids) { + Block block; + for (const auto local_id : local_ids) { + const auto it = std::find_if(schema.begin(), schema.end(), [&](const auto& column) { + return column.local_id == local_id; + }); + DORIS_CHECK(it != schema.end()); + block.insert({it->type->create_column(), it->type, it->name}); + } + return block; +} + +int32_t nullable_int_at(const IColumn& column, size_t row) { + const auto& nullable = assert_cast(column); + const auto& nested = assert_cast(nullable.get_nested_column()); + return nested.get_data()[row]; +} + +std::string nullable_string_at(const IColumn& column, size_t row) { + const auto& nullable = assert_cast(column); + const auto& nested = assert_cast(nullable.get_nested_column()); + return nested.get_data_at(row).to_string(); +} + +class NullableIntGreaterThanExpr final : public VExpr { +public: + NullableIntGreaterThanExpr(size_t block_position, int32_t value) + : VExpr(std::make_shared(), false), + _block_position(block_position), + _value(value) {} + + const std::string& expr_name() const override { return _name; } + + bool is_constant() const override { return false; } + + Status execute_column_impl(VExprContext*, const Block* block, const Selector* selector, + size_t count, ColumnPtr& result_column) const override { + DORIS_CHECK(block != nullptr); + const auto& nullable = + assert_cast(*block->get_by_position(_block_position).column); + const auto& data = assert_cast(nullable.get_nested_column()); + + auto result = ColumnUInt8::create(); + auto& result_data = result->get_data(); + result_data.resize(count); + for (size_t row = 0; row < count; ++row) { + const auto source_row = selector == nullptr ? row : (*selector)[row]; + result_data[row] = + !nullable.is_null_at(source_row) && data.get_element(source_row) > _value; + } + result_column = std::move(result); + return Status::OK(); + } + + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = std::make_shared(_block_position, _value); + return Status::OK(); + } + +private: + size_t _block_position; + int32_t _value; + const std::string _name = "NullableIntGreaterThanExpr"; +}; + +VExprContextSPtr prepared_conjunct(RuntimeState* state, const VExprSPtr& expr) { + auto context = VExprContext::create_shared(expr); + auto status = context->prepare(state, RowDescriptor()); + EXPECT_TRUE(status.ok()) << status; + status = context->open(state); + EXPECT_TRUE(status.ok()) << status; + return context; +} + +} // namespace + +TEST(NativeV2ReaderTest, SchemaProbeReplaysFirstBlockAndProjectsColumns) { + const auto path = "./log/native_v2_reader_" + UniqueId::gen_uid().to_string() + ".native"; + std::filesystem::create_directories("./log"); + ASSERT_TRUE(write_native_file(path, make_source_block()).ok()); + + RuntimeState state; + RuntimeProfile profile("native_v2_reader_test"); + auto reader = create_reader(path, &state, &profile); + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 2); + EXPECT_EQ(schema[0].name, "id"); + EXPECT_EQ(schema[0].local_id, 0); + EXPECT_EQ(schema[1].name, "name"); + EXPECT_EQ(schema[1].local_id, 1); + EXPECT_TRUE(schema[0].type->is_nullable()); + EXPECT_TRUE(schema[1].type->is_nullable()); + + auto request = std::make_shared(); + FileScanRequestBuilder builder(request.get()); + ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(1)).ok()); + ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(0)).ok()); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_request_block(schema, {1, 0}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 2); + EXPECT_FALSE(eof); + EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "alice"); + EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 1), "bob"); + EXPECT_EQ(nullable_int_at(*block.get_by_position(1).column, 0), 10); + EXPECT_EQ(nullable_int_at(*block.get_by_position(1).column, 1), 20); + + block.clear_column_data(2); + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_EQ(rows, 0); + EXPECT_TRUE(eof); + ASSERT_TRUE(reader->close().ok()); + static_cast(io::global_local_filesystem()->delete_file(path)); +} + +TEST(NativeV2ReaderTest, AppliesConjunctsAndTracksPredicateFilteredRows) { + const auto path = + "./log/native_v2_reader_filter_" + UniqueId::gen_uid().to_string() + ".native"; + std::filesystem::create_directories("./log"); + ASSERT_TRUE(write_native_file(path, make_source_block()).ok()); + + RuntimeState state; + RuntimeProfile profile("native_v2_reader_filter_test"); + auto io_ctx = std::make_shared(); + auto reader = create_reader(path, &state, &profile, io_ctx); + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + FileScanRequestBuilder builder(request.get()); + ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(0)).ok()); + ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(1)).ok()); + request->conjuncts = { + prepared_conjunct(&state, std::make_shared(0, 10))}; + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_request_block(schema, {0, 1}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 20); + EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 0), "bob"); + EXPECT_EQ(io_ctx->predicate_filtered_rows, 1); + ASSERT_TRUE(reader->close().ok()); + static_cast(io::global_local_filesystem()->delete_file(path)); +} + +TEST(NativeV2ReaderTest, RejectsInvalidHeaderAndEmptyFile) { + std::filesystem::create_directories("./log"); + RuntimeState state; + RuntimeProfile profile("native_v2_reader_bad_header_test"); + + const auto bad_magic_path = + "./log/native_v2_bad_magic_" + UniqueId::gen_uid().to_string() + ".native"; + std::string bad_magic(sizeof(DORIS_NATIVE_MAGIC) + sizeof(uint32_t), '\0'); + bad_magic.replace(0, 4, "BAD!"); + ASSERT_TRUE(write_file(bad_magic_path, bad_magic).ok()); + auto bad_magic_reader = create_reader(bad_magic_path, &state, &profile); + EXPECT_FALSE(bad_magic_reader->init(&state).ok()); + static_cast(io::global_local_filesystem()->delete_file(bad_magic_path)); + + const auto empty_path = "./log/native_v2_empty_" + UniqueId::gen_uid().to_string() + ".native"; + ASSERT_TRUE(write_file(empty_path, "").ok()); + auto empty_reader = create_reader(empty_path, &state, &profile); + EXPECT_FALSE(empty_reader->init(&state).ok()); + static_cast(io::global_local_filesystem()->delete_file(empty_path)); +} + +TEST(NativeV2ReaderTest, RejectsUnsupportedVersionAndHeaderOnlyFile) { + std::filesystem::create_directories("./log"); + RuntimeState state; + RuntimeProfile profile("native_v2_reader_header_boundary_test"); + + const auto bad_version_path = + "./log/native_v2_bad_version_" + UniqueId::gen_uid().to_string() + ".native"; + std::string bad_version; + bad_version.append(DORIS_NATIVE_MAGIC, sizeof(DORIS_NATIVE_MAGIC)); + uint8_t version_buffer[sizeof(uint32_t)]; + encode_fixed32_le(version_buffer, DORIS_NATIVE_FORMAT_VERSION + 1); + bad_version.append(reinterpret_cast(version_buffer), sizeof(version_buffer)); + ASSERT_TRUE(write_file(bad_version_path, bad_version).ok()); + auto bad_version_reader = create_reader(bad_version_path, &state, &profile); + EXPECT_FALSE(bad_version_reader->init(&state).ok()); + static_cast(io::global_local_filesystem()->delete_file(bad_version_path)); + + const auto header_only_path = + "./log/native_v2_header_only_" + UniqueId::gen_uid().to_string() + ".native"; + std::string header_only; + header_only.append(DORIS_NATIVE_MAGIC, sizeof(DORIS_NATIVE_MAGIC)); + encode_fixed32_le(version_buffer, DORIS_NATIVE_FORMAT_VERSION); + header_only.append(reinterpret_cast(version_buffer), sizeof(version_buffer)); + ASSERT_TRUE(write_file(header_only_path, header_only).ok()); + auto header_only_reader = create_reader(header_only_path, &state, &profile); + ASSERT_TRUE(header_only_reader->init(&state).ok()); + std::vector schema; + EXPECT_FALSE(header_only_reader->get_schema(&schema).ok()); + static_cast(io::global_local_filesystem()->delete_file(header_only_path)); +} + +TEST(NativeV2ReaderTest, RejectsTruncatedBlockDuringSchemaProbe) { + const auto path = "./log/native_v2_truncated_" + UniqueId::gen_uid().to_string() + ".native"; + std::filesystem::create_directories("./log"); + + std::string content; + content.append(DORIS_NATIVE_MAGIC, sizeof(DORIS_NATIVE_MAGIC)); + uint8_t version_buffer[sizeof(uint32_t)]; + encode_fixed32_le(version_buffer, DORIS_NATIVE_FORMAT_VERSION); + content.append(reinterpret_cast(version_buffer), sizeof(version_buffer)); + uint8_t len_buffer[sizeof(uint64_t)]; + encode_fixed64_le(len_buffer, 8); + content.append(reinterpret_cast(len_buffer), sizeof(len_buffer)); + content.append("x"); + ASSERT_TRUE(write_file(path, content).ok()); + + RuntimeState state; + RuntimeProfile profile("native_v2_reader_truncated_test"); + auto reader = create_reader(path, &state, &profile); + ASSERT_TRUE(reader->init(&state).ok()); + std::vector schema; + EXPECT_FALSE(reader->get_schema(&schema).ok()); + static_cast(io::global_local_filesystem()->delete_file(path)); +} + +TEST(NativeV2ReaderTest, RejectsZeroLengthBlockAndInvalidPBlock) { + std::filesystem::create_directories("./log"); + RuntimeState state; + RuntimeProfile profile("native_v2_reader_bad_block_test"); + + auto build_header = [] { + std::string content; + content.append(DORIS_NATIVE_MAGIC, sizeof(DORIS_NATIVE_MAGIC)); + uint8_t version_buffer[sizeof(uint32_t)]; + encode_fixed32_le(version_buffer, DORIS_NATIVE_FORMAT_VERSION); + content.append(reinterpret_cast(version_buffer), sizeof(version_buffer)); + return content; + }; + + const auto zero_len_path = + "./log/native_v2_zero_len_" + UniqueId::gen_uid().to_string() + ".native"; + auto zero_len_content = build_header(); + uint8_t len_buffer[sizeof(uint64_t)]; + encode_fixed64_le(len_buffer, 0); + zero_len_content.append(reinterpret_cast(len_buffer), sizeof(len_buffer)); + ASSERT_TRUE(write_file(zero_len_path, zero_len_content).ok()); + auto zero_len_reader = create_reader(zero_len_path, &state, &profile); + ASSERT_TRUE(zero_len_reader->init(&state).ok()); + std::vector schema; + EXPECT_FALSE(zero_len_reader->get_schema(&schema).ok()); + static_cast(io::global_local_filesystem()->delete_file(zero_len_path)); + + const auto invalid_pblock_path = + "./log/native_v2_invalid_pblock_" + UniqueId::gen_uid().to_string() + ".native"; + auto invalid_pblock_content = build_header(); + encode_fixed64_le(len_buffer, 1); + invalid_pblock_content.append(reinterpret_cast(len_buffer), sizeof(len_buffer)); + invalid_pblock_content.append("x"); + ASSERT_TRUE(write_file(invalid_pblock_path, invalid_pblock_content).ok()); + auto invalid_pblock_reader = create_reader(invalid_pblock_path, &state, &profile); + ASSERT_TRUE(invalid_pblock_reader->init(&state).ok()); + schema.clear(); + EXPECT_FALSE(invalid_pblock_reader->get_schema(&schema).ok()); + static_cast(io::global_local_filesystem()->delete_file(invalid_pblock_path)); +} + +TEST(NativeV2ReaderTest, RejectsUnknownRequestedLocalColumn) { + const auto path = + "./log/native_v2_unknown_column_" + UniqueId::gen_uid().to_string() + ".native"; + std::filesystem::create_directories("./log"); + ASSERT_TRUE(write_native_file(path, make_source_block()).ok()); + + RuntimeState state; + RuntimeProfile profile("native_v2_reader_unknown_column_test"); + auto reader = create_reader(path, &state, &profile); + ASSERT_TRUE(reader->init(&state).ok()); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + FileScanRequestBuilder builder(request.get()); + ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(42)).ok()); + ASSERT_TRUE(reader->open(request).ok()); + Block block; + block.insert({schema[0].type->create_column(), schema[0].type, schema[0].name}); + size_t rows = 0; + bool eof = false; + EXPECT_FALSE(reader->get_block(&block, &rows, &eof).ok()); + static_cast(io::global_local_filesystem()->delete_file(path)); +} + +} // namespace doris::format::native diff --git a/be/test/format_v2/parquet/parquet_column_reader_test.cpp b/be/test/format_v2/parquet/parquet_column_reader_test.cpp new file mode 100644 index 00000000000000..91382203c5cea9 --- /dev/null +++ b/be/test/format_v2/parquet/parquet_column_reader_test.cpp @@ -0,0 +1,3620 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "core/assert_cast.h" +#include "core/column/column_array.h" +#include "core/column/column_decimal.h" +#include "core/column/column_map.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_struct.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_struct.h" +#include "core/types.h" +#include "format_v2/file_reader.h" +#include "format_v2/parquet/parquet_column_schema.h" +#include "format_v2/parquet/reader/column_reader.h" +#include "format_v2/parquet/selection_vector.h" + +namespace doris::format::parquet { +namespace { + +constexpr int64_t ROW_COUNT = 5; + +std::shared_ptr finish_array(arrow::ArrayBuilder* builder) { + std::shared_ptr array; + EXPECT_TRUE(builder->Finish(&array).ok()); + return array; +} + +template +const ColumnType& get_nullable_nested_column(const IColumn& column) { + // File-local schema exposed by the parquet reader follows Doris external-table semantics: + // nested STRUCT fields, LIST elements, and MAP keys/values are nullable even when the parquet + // field is required. + const auto& nullable_column = assert_cast(column); + return assert_cast(nullable_column.get_nested_column()); +} + +ParquetColumnSchema mock_column_schema() { + ParquetColumnSchema schema; + schema.local_id = 0; + schema.name = "mock"; + schema.type = std::make_shared(); + return schema; +} + +class BaseUnsupportedReader final : public ParquetColumnReader { +public: + BaseUnsupportedReader() + : ParquetColumnReader(mock_column_schema(), mock_column_schema().type) {} + + Status read(int64_t, MutableColumnPtr&, int64_t*) override { return Status::OK(); } +}; + +class DefaultSelectReader final : public ParquetColumnReader { +public: + DefaultSelectReader() : ParquetColumnReader(mock_column_schema(), mock_column_schema().type) {} + + Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override { + auto& values = assert_cast(*column); + for (int64_t row = 0; row < rows; ++row) { + values.insert_value(static_cast(_cursor + row)); + } + _cursor += rows; + *rows_read = rows; + _read_ranges.push_back(rows); + return Status::OK(); + } + + Status skip(int64_t rows) override { + _cursor += rows; + _skip_ranges.push_back(rows); + return Status::OK(); + } + + const std::vector& read_ranges() const { return _read_ranges; } + const std::vector& skip_ranges() const { return _skip_ranges; } + +private: + int64_t _cursor = 0; + std::vector _read_ranges; + std::vector _skip_ranges; +}; + +class NestedSkipReader final : public ParquetColumnReader { +public: + NestedSkipReader() : ParquetColumnReader(mock_column_schema(), mock_column_schema().type) {} + + Status read(int64_t, MutableColumnPtr&, int64_t*) override { return Status::OK(); } + + Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column, + int64_t* values_read) override { + auto& values = assert_cast(*column); + for (int64_t row = 0; row < length_upper_bound; ++row) { + values.insert_value(static_cast(row)); + } + *values_read = length_upper_bound; + return Status::OK(); + } +}; + +class ParquetColumnReaderTest : public testing::Test { +protected: + void SetUp() override { + _test_dir = std::filesystem::temp_directory_path() / "doris_parquet_column_reader_test"; + std::filesystem::remove_all(_test_dir); + std::filesystem::create_directories(_test_dir); + _file_path = (_test_dir / "reader.parquet").string(); + write_parquet_file(); + _file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + auto metadata = _file_reader->metadata(); + ASSERT_EQ(metadata->num_row_groups(), 1); + _row_group = _file_reader->RowGroup(0); + ASSERT_NE(_row_group, nullptr); + auto schema_descriptor = _file_reader->metadata()->schema(); + ASSERT_NE(schema_descriptor, nullptr); + auto st = build_parquet_column_schema(*schema_descriptor, &_fields); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(_fields.size(), _expected_by_field.size()); + } + + void TearDown() override { std::filesystem::remove_all(_test_dir); } + + template + std::shared_ptr build_required_array(const std::vector& values) { + Builder builder; + for (const auto& value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return finish_array(&builder); + } + + std::shared_ptr build_string_array(const std::vector& values) { + arrow::StringBuilder builder; + for (const auto& value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return finish_array(&builder); + } + + std::shared_ptr build_nullable_int32_array() { + arrow::Int32Builder builder; + EXPECT_TRUE(builder.Append(1).ok()); + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.Append(3).ok()); + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.Append(5).ok()); + return finish_array(&builder); + } + + std::shared_ptr build_all_null_int32_array() { + arrow::Int32Builder builder; + for (int64_t row = 0; row < ROW_COUNT; ++row) { + EXPECT_TRUE(builder.AppendNull().ok()); + } + return finish_array(&builder); + } + + std::shared_ptr build_required_struct_array() { + auto struct_type = arrow::struct_({arrow::field("a", arrow::int32(), false), + arrow::field("b", arrow::utf8(), false)}); + std::vector> field_builders; + auto a_array_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(a_array_builder))); + auto b_array_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(b_array_builder))); + arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(), + std::move(field_builders)); + auto* a_builder = assert_cast(builder.field_builder(0)); + auto* b_builder = assert_cast(builder.field_builder(1)); + const std::vector a_values = {101, 102, 103, 104, 105}; + const std::vector b_values = {"sa", "sb", "sc", "sd", "se"}; + for (size_t row = 0; row < a_values.size(); ++row) { + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(a_builder->Append(a_values[row]).ok()); + EXPECT_TRUE(b_builder->Append(b_values[row]).ok()); + } + return finish_array(&builder); + } + + std::shared_ptr build_nullable_struct_array() { + auto struct_type = arrow::struct_( + {arrow::field("a", arrow::int32(), false), arrow::field("b", arrow::utf8(), true)}); + std::vector> field_builders; + auto a_array_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(a_array_builder))); + auto b_array_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(b_array_builder))); + arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(), + std::move(field_builders)); + auto* a_builder = assert_cast(builder.field_builder(0)); + auto* b_builder = assert_cast(builder.field_builder(1)); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(a_builder->Append(201).ok()); + EXPECT_TRUE(b_builder->Append("nsa").ok()); + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(a_builder->Append(203).ok()); + EXPECT_TRUE(b_builder->AppendNull().ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(a_builder->Append(204).ok()); + EXPECT_TRUE(b_builder->Append("nsd").ok()); + EXPECT_TRUE(builder.AppendNull().ok()); + return finish_array(&builder); + } + + std::shared_ptr build_nullable_struct_with_decimal_array() { + auto decimal_type = arrow::decimal128(38, 6); + auto struct_type = arrow::struct_( + {arrow::field("a", arrow::int32(), false), arrow::field("d", decimal_type, true)}); + std::vector> field_builders; + auto a_array_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(a_array_builder))); + auto d_array_builder = std::make_unique( + decimal_type, arrow::default_memory_pool()); + field_builders.push_back(std::shared_ptr(std::move(d_array_builder))); + arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(), + std::move(field_builders)); + auto* a_builder = assert_cast(builder.field_builder(0)); + auto* d_builder = assert_cast(builder.field_builder(1)); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(a_builder->Append(301).ok()); + EXPECT_TRUE(d_builder->Append(arrow::Decimal128(123456789)).ok()); + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(a_builder->Append(303).ok()); + EXPECT_TRUE(d_builder->AppendNull().ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(a_builder->Append(304).ok()); + EXPECT_TRUE(d_builder->Append(arrow::Decimal128(-987654321)).ok()); + EXPECT_TRUE(builder.AppendNull().ok()); + return finish_array(&builder); + } + + std::shared_ptr build_nullable_struct_with_list_array() { + auto list_type = arrow::list(arrow::field("element", arrow::int32(), true)); + auto struct_type = arrow::struct_( + {arrow::field("a", arrow::int32(), false), arrow::field("xs", list_type, true)}); + std::vector> field_builders; + auto a_array_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(a_array_builder))); + auto value_builder = std::make_shared(); + auto list_builder = std::make_shared(arrow::default_memory_pool(), + value_builder, list_type); + field_builders.push_back(list_builder); + arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(), + std::move(field_builders)); + auto* a_builder = assert_cast(builder.field_builder(0)); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(a_builder->Append(301).ok()); + EXPECT_TRUE(list_builder->Append().ok()); + EXPECT_TRUE(value_builder->Append(1).ok()); + EXPECT_TRUE(value_builder->Append(2).ok()); + + EXPECT_TRUE(builder.AppendNull().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(a_builder->Append(303).ok()); + EXPECT_TRUE(list_builder->AppendEmptyValue().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(a_builder->Append(304).ok()); + EXPECT_TRUE(list_builder->AppendNull().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(a_builder->Append(305).ok()); + EXPECT_TRUE(list_builder->Append().ok()); + EXPECT_TRUE(value_builder->AppendNull().ok()); + EXPECT_TRUE(value_builder->Append(5).ok()); + return finish_array(&builder); + } + + std::shared_ptr build_nullable_struct_with_map_array() { + auto map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)); + auto struct_type = arrow::struct_( + {arrow::field("a", arrow::int32(), false), arrow::field("kv", map_type, true)}); + std::vector> field_builders; + auto a_array_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(a_array_builder))); + auto key_builder = std::make_shared(); + auto value_builder = std::make_shared(); + auto map_builder = std::make_shared( + arrow::default_memory_pool(), key_builder, value_builder, map_type); + field_builders.push_back(map_builder); + arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(), + std::move(field_builders)); + auto* a_builder = assert_cast(builder.field_builder(0)); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(a_builder->Append(401).ok()); + EXPECT_TRUE(map_builder->Append().ok()); + EXPECT_TRUE(key_builder->Append(1).ok()); + EXPECT_TRUE(value_builder->Append("one").ok()); + EXPECT_TRUE(key_builder->Append(2).ok()); + EXPECT_TRUE(value_builder->AppendNull().ok()); + + EXPECT_TRUE(builder.AppendNull().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(a_builder->Append(403).ok()); + EXPECT_TRUE(map_builder->AppendEmptyValue().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(a_builder->Append(404).ok()); + EXPECT_TRUE(map_builder->AppendNull().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(a_builder->Append(405).ok()); + EXPECT_TRUE(map_builder->Append().ok()); + EXPECT_TRUE(key_builder->Append(5).ok()); + EXPECT_TRUE(value_builder->Append("five").ok()); + return finish_array(&builder); + } + + std::shared_ptr build_nullable_struct_with_nested_struct_list_array() { + auto list_type = arrow::list(arrow::field("element", arrow::int32(), true)); + auto nested_type = arrow::struct_({arrow::field("xs", list_type, true)}); + auto struct_type = arrow::struct_({arrow::field("nested", nested_type, true)}); + + auto value_builder = std::make_shared(); + auto list_builder = std::make_shared(arrow::default_memory_pool(), + value_builder, list_type); + std::vector> nested_field_builders; + nested_field_builders.push_back(list_builder); + auto nested_builder = std::make_shared( + nested_type, arrow::default_memory_pool(), std::move(nested_field_builders)); + std::vector> field_builders; + field_builders.push_back(nested_builder); + arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(), + std::move(field_builders)); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(nested_builder->Append().ok()); + EXPECT_TRUE(list_builder->Append().ok()); + EXPECT_TRUE(value_builder->Append(7).ok()); + EXPECT_TRUE(value_builder->Append(8).ok()); + + EXPECT_TRUE(builder.AppendNull().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(nested_builder->AppendNull().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(nested_builder->Append().ok()); + EXPECT_TRUE(list_builder->AppendNull().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(nested_builder->Append().ok()); + EXPECT_TRUE(list_builder->AppendEmptyValue().ok()); + return finish_array(&builder); + } + + std::shared_ptr build_required_int_list_array() { + auto value_builder = std::make_shared(); + arrow::ListBuilder builder(arrow::default_memory_pool(), value_builder, + arrow::list(arrow::field("element", arrow::int32(), false))); + const std::vector> values = { + {1, 2}, {3}, {4, 5, 6}, {7}, {8, 9}, + }; + for (const auto& row : values) { + EXPECT_TRUE(builder.Append().ok()); + for (const auto value : row) { + EXPECT_TRUE(value_builder->Append(value).ok()); + } + } + return finish_array(&builder); + } + + std::shared_ptr build_nullable_int_list_array() { + auto value_builder = std::make_shared(); + arrow::ListBuilder builder(arrow::default_memory_pool(), value_builder, + arrow::list(arrow::field("element", arrow::int32(), true))); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(value_builder->Append(10).ok()); + EXPECT_TRUE(value_builder->Append(20).ok()); + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.AppendEmptyValue().ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(value_builder->AppendNull().ok()); + EXPECT_TRUE(value_builder->Append(30).ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(value_builder->Append(40).ok()); + return finish_array(&builder); + } + + std::shared_ptr build_required_nullable_int_list_array() { + auto value_builder = std::make_shared(); + arrow::ListBuilder builder(arrow::default_memory_pool(), value_builder, + arrow::list(arrow::field("element", arrow::int32(), true))); + EXPECT_TRUE(builder.AppendEmptyValue().ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(value_builder->AppendNull().ok()); + EXPECT_TRUE(value_builder->Append(110).ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(value_builder->Append(120).ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(value_builder->Append(130).ok()); + EXPECT_TRUE(value_builder->AppendNull().ok()); + EXPECT_TRUE(builder.Append().ok()); + return finish_array(&builder); + } + + std::shared_ptr build_nullable_struct_list_array() { + auto struct_type = arrow::struct_( + {arrow::field("a", arrow::int32(), false), arrow::field("b", arrow::utf8(), true)}); + std::vector> field_builders; + auto a_array_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(a_array_builder))); + auto b_array_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(b_array_builder))); + auto struct_builder = std::make_shared( + struct_type, arrow::default_memory_pool(), std::move(field_builders)); + arrow::ListBuilder builder(arrow::default_memory_pool(), struct_builder, + arrow::list(arrow::field("element", struct_type, true))); + auto* a_builder = assert_cast(struct_builder->field_builder(0)); + auto* b_builder = assert_cast(struct_builder->field_builder(1)); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(struct_builder->Append().ok()); + EXPECT_TRUE(a_builder->Append(11).ok()); + EXPECT_TRUE(b_builder->Append("la").ok()); + EXPECT_TRUE(struct_builder->Append().ok()); + EXPECT_TRUE(a_builder->Append(12).ok()); + EXPECT_TRUE(b_builder->AppendNull().ok()); + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.AppendEmptyValue().ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(struct_builder->AppendNull().ok()); + EXPECT_TRUE(struct_builder->Append().ok()); + EXPECT_TRUE(a_builder->Append(13).ok()); + EXPECT_TRUE(b_builder->Append("ld").ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(struct_builder->Append().ok()); + EXPECT_TRUE(a_builder->Append(14).ok()); + EXPECT_TRUE(b_builder->Append("le").ok()); + return finish_array(&builder); + } + + std::shared_ptr build_nullable_list_list_int_array() { + auto value_builder = std::make_shared(); + auto inner_list_type = arrow::list(arrow::field("element", arrow::int32(), true)); + auto inner_list_builder = std::make_shared( + arrow::default_memory_pool(), value_builder, inner_list_type); + arrow::ListBuilder builder(arrow::default_memory_pool(), inner_list_builder, + arrow::list(arrow::field("element", inner_list_type, true))); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(inner_list_builder->Append().ok()); + EXPECT_TRUE(value_builder->Append(1).ok()); + EXPECT_TRUE(value_builder->Append(2).ok()); + EXPECT_TRUE(inner_list_builder->AppendEmptyValue().ok()); + EXPECT_TRUE(inner_list_builder->AppendNull().ok()); + EXPECT_TRUE(inner_list_builder->Append().ok()); + EXPECT_TRUE(value_builder->AppendNull().ok()); + EXPECT_TRUE(value_builder->Append(3).ok()); + + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.AppendEmptyValue().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(inner_list_builder->Append().ok()); + EXPECT_TRUE(value_builder->Append(4).ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(inner_list_builder->AppendEmptyValue().ok()); + EXPECT_TRUE(inner_list_builder->Append().ok()); + EXPECT_TRUE(value_builder->Append(5).ok()); + EXPECT_TRUE(value_builder->AppendNull().ok()); + return finish_array(&builder); + } + + std::shared_ptr build_required_int_string_map_array() { + auto key_builder = std::make_shared(); + auto value_builder = std::make_shared(); + auto map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), false)); + arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, value_builder, + map_type); + const std::vector>> values = { + {{1, "a"}, {2, "b"}}, {{3, "c"}}, {{4, "d"}, {5, "e"}, {6, "f"}}, + {{7, "g"}}, {{8, "h"}, {9, "i"}}, + }; + for (const auto& row : values) { + EXPECT_TRUE(builder.Append().ok()); + for (const auto& [key, value] : row) { + EXPECT_TRUE(key_builder->Append(key).ok()); + EXPECT_TRUE(value_builder->Append(value).ok()); + } + } + return finish_array(&builder); + } + + std::shared_ptr build_nullable_int_string_map_array() { + auto key_builder = std::make_shared(); + auto value_builder = std::make_shared(); + auto map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)); + arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, value_builder, + map_type); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(10).ok()); + EXPECT_TRUE(value_builder->Append("aa").ok()); + EXPECT_TRUE(key_builder->Append(20).ok()); + EXPECT_TRUE(value_builder->AppendNull().ok()); + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.AppendEmptyValue().ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(30).ok()); + EXPECT_TRUE(value_builder->Append("cc").ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(40).ok()); + EXPECT_TRUE(value_builder->AppendNull().ok()); + return finish_array(&builder); + } + + std::shared_ptr build_required_nullable_string_map_array() { + auto key_builder = std::make_shared(); + auto value_builder = std::make_shared(); + auto map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)); + arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, value_builder, + map_type); + EXPECT_TRUE(builder.AppendEmptyValue().ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(101).ok()); + EXPECT_TRUE(value_builder->AppendNull().ok()); + EXPECT_TRUE(key_builder->Append(102).ok()); + EXPECT_TRUE(value_builder->Append("bb").ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(103).ok()); + EXPECT_TRUE(value_builder->Append("cc").ok()); + EXPECT_TRUE(builder.AppendEmptyValue().ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(104).ok()); + EXPECT_TRUE(value_builder->AppendNull().ok()); + return finish_array(&builder); + } + + std::shared_ptr build_nullable_int_struct_map_array() { + auto key_builder = std::make_shared(); + auto struct_type = arrow::struct_( + {arrow::field("a", arrow::int32(), false), arrow::field("b", arrow::utf8(), true)}); + std::vector> field_builders; + auto a_array_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(a_array_builder))); + auto b_array_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(b_array_builder))); + auto value_builder = std::make_shared( + struct_type, arrow::default_memory_pool(), std::move(field_builders)); + auto map_type = arrow::map(arrow::int32(), arrow::field("value", struct_type, true)); + arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, value_builder, + map_type); + auto* a_builder = assert_cast(value_builder->field_builder(0)); + auto* b_builder = assert_cast(value_builder->field_builder(1)); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(101).ok()); + EXPECT_TRUE(value_builder->Append().ok()); + EXPECT_TRUE(a_builder->Append(21).ok()); + EXPECT_TRUE(b_builder->Append("ma").ok()); + EXPECT_TRUE(key_builder->Append(102).ok()); + EXPECT_TRUE(value_builder->Append().ok()); + EXPECT_TRUE(a_builder->Append(22).ok()); + EXPECT_TRUE(b_builder->AppendNull().ok()); + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.AppendEmptyValue().ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(103).ok()); + EXPECT_TRUE(value_builder->AppendNull().ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(104).ok()); + EXPECT_TRUE(value_builder->Append().ok()); + EXPECT_TRUE(a_builder->Append(24).ok()); + EXPECT_TRUE(b_builder->Append("me").ok()); + return finish_array(&builder); + } + + std::shared_ptr build_nullable_int_list_map_array() { + auto key_builder = std::make_shared(); + auto value_builder = std::make_shared(); + auto list_type = arrow::list(arrow::field("element", arrow::int32(), true)); + auto list_builder = std::make_shared(arrow::default_memory_pool(), + value_builder, list_type); + auto map_type = arrow::map(arrow::int32(), arrow::field("value", list_type, true)); + arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, list_builder, + map_type); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(201).ok()); + EXPECT_TRUE(list_builder->Append().ok()); + EXPECT_TRUE(value_builder->Append(1).ok()); + EXPECT_TRUE(value_builder->Append(2).ok()); + EXPECT_TRUE(key_builder->Append(202).ok()); + EXPECT_TRUE(list_builder->AppendEmptyValue().ok()); + + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.AppendEmptyValue().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(203).ok()); + EXPECT_TRUE(list_builder->AppendNull().ok()); + EXPECT_TRUE(key_builder->Append(204).ok()); + EXPECT_TRUE(list_builder->Append().ok()); + EXPECT_TRUE(value_builder->AppendNull().ok()); + EXPECT_TRUE(value_builder->Append(3).ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(205).ok()); + EXPECT_TRUE(list_builder->Append().ok()); + EXPECT_TRUE(value_builder->Append(4).ok()); + return finish_array(&builder); + } + + std::shared_ptr build_nullable_map_list_array() { + auto key_builder = std::make_shared(); + auto value_builder = std::make_shared(); + auto map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)); + auto map_builder = std::make_shared( + arrow::default_memory_pool(), key_builder, value_builder, map_type); + arrow::ListBuilder builder(arrow::default_memory_pool(), map_builder, + arrow::list(arrow::field("element", map_type, true))); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(map_builder->Append().ok()); + EXPECT_TRUE(key_builder->Append(1).ok()); + EXPECT_TRUE(value_builder->Append("a").ok()); + EXPECT_TRUE(key_builder->Append(2).ok()); + EXPECT_TRUE(value_builder->AppendNull().ok()); + EXPECT_TRUE(map_builder->AppendEmptyValue().ok()); + + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.AppendEmptyValue().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(map_builder->AppendNull().ok()); + EXPECT_TRUE(map_builder->Append().ok()); + EXPECT_TRUE(key_builder->Append(3).ok()); + EXPECT_TRUE(value_builder->Append("c").ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(map_builder->Append().ok()); + EXPECT_TRUE(key_builder->Append(4).ok()); + EXPECT_TRUE(value_builder->Append("d").ok()); + return finish_array(&builder); + } + + std::shared_ptr build_nullable_int_map_map_array() { + auto key_builder = std::make_shared(); + auto nested_key_builder = std::make_shared(); + auto nested_value_builder = std::make_shared(); + auto nested_map_type = + arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)); + auto nested_map_builder = std::make_shared( + arrow::default_memory_pool(), nested_key_builder, nested_value_builder, + nested_map_type); + auto map_type = arrow::map(arrow::int32(), arrow::field("value", nested_map_type, true)); + arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, nested_map_builder, + map_type); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(10).ok()); + EXPECT_TRUE(nested_map_builder->Append().ok()); + EXPECT_TRUE(nested_key_builder->Append(101).ok()); + EXPECT_TRUE(nested_value_builder->Append("aa").ok()); + EXPECT_TRUE(key_builder->Append(20).ok()); + EXPECT_TRUE(nested_map_builder->AppendEmptyValue().ok()); + + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.AppendEmptyValue().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(30).ok()); + EXPECT_TRUE(nested_map_builder->AppendNull().ok()); + EXPECT_TRUE(key_builder->Append(40).ok()); + EXPECT_TRUE(nested_map_builder->Append().ok()); + EXPECT_TRUE(nested_key_builder->Append(401).ok()); + EXPECT_TRUE(nested_value_builder->AppendNull().ok()); + + EXPECT_TRUE(builder.AppendEmptyValue().ok()); + return finish_array(&builder); + } + + std::shared_ptr build_deep_list_struct_map_list_array() { + auto element_builder = std::make_shared(); + auto list_type = arrow::list(arrow::field("element", arrow::int32(), true)); + auto list_builder = std::make_shared(arrow::default_memory_pool(), + element_builder, list_type); + auto key_builder = std::make_shared(); + auto map_type = arrow::map(arrow::int32(), arrow::field("value", list_type, true)); + auto map_builder = std::make_shared(arrow::default_memory_pool(), + key_builder, list_builder, map_type); + auto struct_type = arrow::struct_({arrow::field("kv", map_type, true)}); + std::vector> struct_field_builders; + struct_field_builders.push_back(map_builder); + auto struct_builder = std::make_shared( + struct_type, arrow::default_memory_pool(), std::move(struct_field_builders)); + arrow::ListBuilder builder(arrow::default_memory_pool(), struct_builder, + arrow::list(arrow::field("element", struct_type, true))); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(struct_builder->Append().ok()); + EXPECT_TRUE(map_builder->Append().ok()); + EXPECT_TRUE(key_builder->Append(1).ok()); + EXPECT_TRUE(list_builder->Append().ok()); + EXPECT_TRUE(element_builder->Append(10).ok()); + EXPECT_TRUE(element_builder->AppendNull().ok()); + EXPECT_TRUE(key_builder->Append(2).ok()); + EXPECT_TRUE(list_builder->AppendEmptyValue().ok()); + EXPECT_TRUE(struct_builder->AppendNull().ok()); + + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.AppendEmptyValue().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(struct_builder->Append().ok()); + EXPECT_TRUE(map_builder->AppendNull().ok()); + EXPECT_TRUE(struct_builder->Append().ok()); + EXPECT_TRUE(map_builder->AppendEmptyValue().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(struct_builder->Append().ok()); + EXPECT_TRUE(map_builder->Append().ok()); + EXPECT_TRUE(key_builder->Append(3).ok()); + EXPECT_TRUE(list_builder->AppendNull().ok()); + EXPECT_TRUE(key_builder->Append(4).ok()); + EXPECT_TRUE(list_builder->Append().ok()); + EXPECT_TRUE(element_builder->Append(40).ok()); + return finish_array(&builder); + } + + std::shared_ptr build_deep_map_list_map_array() { + auto nested_key_builder = std::make_shared(); + auto nested_value_builder = std::make_shared(); + auto nested_map_type = + arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)); + auto nested_map_builder = std::make_shared( + arrow::default_memory_pool(), nested_key_builder, nested_value_builder, + nested_map_type); + auto list_type = arrow::list(arrow::field("element", nested_map_type, true)); + auto list_builder = std::make_shared(arrow::default_memory_pool(), + nested_map_builder, list_type); + auto key_builder = std::make_shared(); + auto map_type = arrow::map(arrow::int32(), arrow::field("value", list_type, true)); + arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, list_builder, + map_type); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(10).ok()); + EXPECT_TRUE(list_builder->Append().ok()); + EXPECT_TRUE(nested_map_builder->Append().ok()); + EXPECT_TRUE(nested_key_builder->Append(1).ok()); + EXPECT_TRUE(nested_value_builder->Append("a").ok()); + EXPECT_TRUE(nested_key_builder->Append(2).ok()); + EXPECT_TRUE(nested_value_builder->AppendNull().ok()); + EXPECT_TRUE(nested_map_builder->AppendEmptyValue().ok()); + EXPECT_TRUE(nested_map_builder->AppendNull().ok()); + EXPECT_TRUE(key_builder->Append(20).ok()); + EXPECT_TRUE(list_builder->AppendEmptyValue().ok()); + + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.AppendEmptyValue().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(30).ok()); + EXPECT_TRUE(list_builder->AppendNull().ok()); + EXPECT_TRUE(key_builder->Append(40).ok()); + EXPECT_TRUE(list_builder->Append().ok()); + EXPECT_TRUE(nested_map_builder->Append().ok()); + EXPECT_TRUE(nested_key_builder->Append(3).ok()); + EXPECT_TRUE(nested_value_builder->Append("c").ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(50).ok()); + EXPECT_TRUE(list_builder->Append().ok()); + EXPECT_TRUE(nested_map_builder->AppendNull().ok()); + EXPECT_TRUE(nested_map_builder->Append().ok()); + EXPECT_TRUE(nested_key_builder->Append(4).ok()); + EXPECT_TRUE(nested_value_builder->Append("d").ok()); + return finish_array(&builder); + } + + void add_field(const std::shared_ptr& field, std::shared_ptr array, + std::function validator) { + _arrow_fields.push_back(field); + _arrays.push_back(std::move(array)); + _expected_by_field.push_back(std::move(validator)); + } + + void write_parquet_file() { + add_field(arrow::field("int32_col", arrow::int32(), false), + build_required_array({10, 20, 30, 40, 50}), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT32); + const auto& values = assert_cast(column); + EXPECT_EQ(values.get_element(0), 10); + EXPECT_EQ(values.get_element(4), 50); + }); + add_field(arrow::field("string_col", arrow::utf8(), false), + build_string_array({"alpha", "beta", "gamma", "delta", "epsilon"}), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type_descriptor.is_string_like); + const auto& values = assert_cast(column); + EXPECT_EQ(values.get_data_at(0).to_string(), "alpha"); + EXPECT_EQ(values.get_data_at(4).to_string(), "epsilon"); + }); + add_field(arrow::field("nullable_int_col", arrow::int32(), true), + build_nullable_int32_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type->is_nullable()); + const auto& nullable_column = assert_cast(column); + const auto& nested_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_TRUE(nullable_column.is_null_at(3)); + EXPECT_EQ(nested_column.get_element(0), 1); + EXPECT_EQ(nested_column.get_element(2), 3); + }); + add_field(arrow::field("all_null_int_col", arrow::int32(), true), + build_all_null_int32_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type->is_nullable()); + const auto& nullable_column = assert_cast(column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + for (size_t row = 0; row < ROW_COUNT; ++row) { + EXPECT_TRUE(nullable_column.is_null_at(row)); + } + }); + add_field(arrow::field("struct_col", + arrow::struct_({ + arrow::field("a", arrow::int32(), false), + arrow::field("b", arrow::utf8(), false), + }), + false), + build_required_struct_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_STRUCT); + const auto& struct_column = assert_cast(column); + ASSERT_EQ(struct_column.get_columns().size(), 2); + const auto& a_values = + get_nullable_nested_column(struct_column.get_column(0)); + const auto& b_values = + get_nullable_nested_column(struct_column.get_column(1)); + EXPECT_EQ(a_values.get_element(0), 101); + EXPECT_EQ(a_values.get_element(4), 105); + EXPECT_EQ(b_values.get_data_at(1).to_string(), "sb"); + EXPECT_EQ(b_values.get_data_at(4).to_string(), "se"); + }); + add_field(arrow::field("nullable_struct_col", + arrow::struct_({ + arrow::field("a", arrow::int32(), false), + arrow::field("b", arrow::utf8(), true), + }), + true), + build_nullable_struct_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type->is_nullable()); + const auto& nullable_column = assert_cast(column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_TRUE(nullable_column.is_null_at(4)); + + const auto& struct_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(struct_column.get_columns().size(), 2); + const auto& a_values = + get_nullable_nested_column(struct_column.get_column(0)); + const auto& b_values = + assert_cast(struct_column.get_column(1)); + const auto& b_nested = + assert_cast(b_values.get_nested_column()); + EXPECT_EQ(a_values.get_element(0), 201); + EXPECT_EQ(a_values.get_element(2), 203); + EXPECT_EQ(a_values.get_element(3), 204); + EXPECT_FALSE(b_values.is_null_at(0)); + EXPECT_TRUE(b_values.is_null_at(2)); + EXPECT_FALSE(b_values.is_null_at(3)); + EXPECT_EQ(b_nested.get_data_at(0).to_string(), "nsa"); + EXPECT_EQ(b_nested.get_data_at(3).to_string(), "nsd"); + }); + add_field(arrow::field("nullable_struct_decimal_col", + arrow::struct_({ + arrow::field("a", arrow::int32(), false), + arrow::field("d", arrow::decimal128(38, 6), true), + }), + true), + build_nullable_struct_with_decimal_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type->is_nullable()); + const auto& nullable_column = assert_cast(column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_TRUE(nullable_column.is_null_at(4)); + + const auto& struct_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(struct_column.get_columns().size(), 2); + const auto& a_values = + get_nullable_nested_column(struct_column.get_column(0)); + const auto& d_values = + assert_cast(struct_column.get_column(1)); + const auto& d_nested = + assert_cast(d_values.get_nested_column()); + EXPECT_EQ(a_values.get_element(0), 301); + EXPECT_EQ(a_values.get_element(2), 303); + EXPECT_EQ(a_values.get_element(3), 304); + EXPECT_FALSE(d_values.is_null_at(0)); + EXPECT_TRUE(d_values.is_null_at(2)); + EXPECT_FALSE(d_values.is_null_at(3)); + EXPECT_EQ(d_nested.get_element(0), Decimal128V3(123456789)); + EXPECT_EQ(d_nested.get_element(3), Decimal128V3(-987654321)); + }); + auto struct_list_type = arrow::struct_({ + arrow::field("a", arrow::int32(), false), + arrow::field("xs", arrow::list(arrow::field("element", arrow::int32(), true)), + true), + }); + add_field(arrow::field("nullable_struct_list_col", struct_list_type, true), + build_nullable_struct_with_list_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type->is_nullable()); + const auto& nullable_column = assert_cast(column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_FALSE(nullable_column.is_null_at(4)); + + const auto& struct_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(struct_column.get_columns().size(), 2); + const auto& a_values = + get_nullable_nested_column(struct_column.get_column(0)); + EXPECT_EQ(a_values.get_element(0), 301); + EXPECT_EQ(a_values.get_element(2), 303); + EXPECT_EQ(a_values.get_element(3), 304); + EXPECT_EQ(a_values.get_element(4), 305); + + const auto& xs_nullable = + assert_cast(struct_column.get_column(1)); + ASSERT_EQ(xs_nullable.size(), ROW_COUNT); + EXPECT_FALSE(xs_nullable.is_null_at(0)); + EXPECT_FALSE(xs_nullable.is_null_at(2)); + EXPECT_TRUE(xs_nullable.is_null_at(3)); + EXPECT_FALSE(xs_nullable.is_null_at(4)); + const auto& xs_array = + assert_cast(xs_nullable.get_nested_column()); + const auto& offsets = xs_array.get_offsets(); + ASSERT_EQ(offsets.size(), ROW_COUNT); + EXPECT_EQ(offsets[0], 2); + EXPECT_EQ(offsets[1], 2); + EXPECT_EQ(offsets[2], 2); + EXPECT_EQ(offsets[3], 2); + EXPECT_EQ(offsets[4], 4); + const auto& elements = + assert_cast(xs_array.get_data()); + ASSERT_EQ(elements.size(), 4); + EXPECT_FALSE(elements.is_null_at(0)); + EXPECT_FALSE(elements.is_null_at(1)); + EXPECT_TRUE(elements.is_null_at(2)); + EXPECT_FALSE(elements.is_null_at(3)); + const auto& values = + assert_cast(elements.get_nested_column()); + EXPECT_EQ(values.get_element(0), 1); + EXPECT_EQ(values.get_element(1), 2); + EXPECT_EQ(values.get_element(3), 5); + }); + auto struct_map_type = arrow::struct_({ + arrow::field("a", arrow::int32(), false), + arrow::field("kv", + arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)), + true), + }); + add_field(arrow::field("nullable_struct_map_col", struct_map_type, true), + build_nullable_struct_with_map_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type->is_nullable()); + const auto& nullable_column = assert_cast(column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_FALSE(nullable_column.is_null_at(4)); + + const auto& struct_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(struct_column.get_columns().size(), 2); + const auto& a_values = + get_nullable_nested_column(struct_column.get_column(0)); + EXPECT_EQ(a_values.get_element(0), 401); + EXPECT_EQ(a_values.get_element(2), 403); + EXPECT_EQ(a_values.get_element(3), 404); + EXPECT_EQ(a_values.get_element(4), 405); + + const auto& kv_nullable = + assert_cast(struct_column.get_column(1)); + ASSERT_EQ(kv_nullable.size(), ROW_COUNT); + EXPECT_FALSE(kv_nullable.is_null_at(0)); + EXPECT_FALSE(kv_nullable.is_null_at(2)); + EXPECT_TRUE(kv_nullable.is_null_at(3)); + EXPECT_FALSE(kv_nullable.is_null_at(4)); + const auto& kv_map = + assert_cast(kv_nullable.get_nested_column()); + const auto& offsets = kv_map.get_offsets(); + ASSERT_EQ(offsets.size(), ROW_COUNT); + EXPECT_EQ(offsets[0], 2); + EXPECT_EQ(offsets[1], 2); + EXPECT_EQ(offsets[2], 2); + EXPECT_EQ(offsets[3], 2); + EXPECT_EQ(offsets[4], 3); + const auto& keys = get_nullable_nested_column(kv_map.get_keys()); + const auto& values = assert_cast(kv_map.get_values()); + const auto& value_data = + assert_cast(values.get_nested_column()); + ASSERT_EQ(keys.size(), 3); + ASSERT_EQ(values.size(), 3); + EXPECT_EQ(keys.get_element(0), 1); + EXPECT_EQ(keys.get_element(1), 2); + EXPECT_EQ(keys.get_element(2), 5); + EXPECT_EQ(value_data.get_data_at(0).to_string(), "one"); + EXPECT_TRUE(values.is_null_at(1)); + EXPECT_EQ(value_data.get_data_at(2).to_string(), "five"); + }); + auto nested_struct_list_type = arrow::struct_({ + arrow::field("nested", + arrow::struct_({ + arrow::field("xs", + arrow::list(arrow::field("element", + arrow::int32(), true)), + true), + }), + true), + }); + add_field(arrow::field("nullable_struct_nested_struct_list_col", nested_struct_list_type, + true), + build_nullable_struct_with_nested_struct_list_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type->is_nullable()); + const auto& nullable_column = assert_cast(column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_FALSE(nullable_column.is_null_at(4)); + + const auto& struct_column = + assert_cast(nullable_column.get_nested_column()); + const auto& nested_nullable = + assert_cast(struct_column.get_column(0)); + EXPECT_FALSE(nested_nullable.is_null_at(0)); + EXPECT_TRUE(nested_nullable.is_null_at(2)); + EXPECT_FALSE(nested_nullable.is_null_at(3)); + EXPECT_FALSE(nested_nullable.is_null_at(4)); + }); + add_field(arrow::field("list_int_col", + arrow::list(arrow::field("element", arrow::int32(), false)), false), + build_required_int_list_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_ARRAY); + const auto* array_type = + assert_cast(remove_nullable(schema.type).get()); + EXPECT_EQ( + remove_nullable(array_type->get_nested_type())->get_primitive_type(), + TYPE_INT); + const auto& array_column = assert_cast(column); + ASSERT_EQ(array_column.size(), ROW_COUNT); + const auto array_size_at = [&array_column](size_t row_idx) { + return array_column.get_offsets()[row_idx] - + (row_idx == 0 ? 0 : array_column.get_offsets()[row_idx - 1]); + }; + EXPECT_EQ(array_size_at(0), 2); + EXPECT_EQ(array_size_at(1), 1); + EXPECT_EQ(array_size_at(2), 3); + EXPECT_EQ(array_size_at(4), 2); + const auto& values = + get_nullable_nested_column(array_column.get_data()); + ASSERT_EQ(values.size(), 9); + EXPECT_EQ(values.get_element(0), 1); + EXPECT_EQ(values.get_element(5), 6); + EXPECT_EQ(values.get_element(8), 9); + }); + add_field(arrow::field("nullable_list_int_col", + arrow::list(arrow::field("element", arrow::int32(), true)), true), + build_nullable_int_list_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type->is_nullable()); + const auto& nullable_column = assert_cast(column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + const auto& array_column = + assert_cast(nullable_column.get_nested_column()); + const auto& offsets = array_column.get_offsets(); + ASSERT_EQ(offsets.size(), ROW_COUNT); + EXPECT_EQ(offsets[0], 2); + EXPECT_EQ(offsets[1], 2); + EXPECT_EQ(offsets[2], 2); + EXPECT_EQ(offsets[3], 4); + EXPECT_EQ(offsets[4], 5); + const auto& elements = + assert_cast(array_column.get_data()); + const auto& values = + assert_cast(elements.get_nested_column()); + ASSERT_EQ(elements.size(), 5); + EXPECT_EQ(values.get_element(0), 10); + EXPECT_EQ(values.get_element(1), 20); + EXPECT_TRUE(elements.is_null_at(2)); + EXPECT_EQ(values.get_element(3), 30); + EXPECT_EQ(values.get_element(4), 40); + }); + add_field(arrow::field("required_nullable_list_int_col", + arrow::list(arrow::field("element", arrow::int32(), true)), false), + build_required_nullable_int_list_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_FALSE(schema.type->is_nullable()); + const auto& array_column = assert_cast(column); + const auto& offsets = array_column.get_offsets(); + ASSERT_EQ(offsets.size(), ROW_COUNT); + EXPECT_EQ(offsets[0], 0); + EXPECT_EQ(offsets[1], 2); + EXPECT_EQ(offsets[2], 3); + EXPECT_EQ(offsets[3], 5); + EXPECT_EQ(offsets[4], 5); + const auto& elements = + assert_cast(array_column.get_data()); + ASSERT_EQ(elements.size(), 5); + EXPECT_TRUE(elements.is_null_at(0)); + EXPECT_FALSE(elements.is_null_at(1)); + EXPECT_TRUE(elements.is_null_at(4)); + }); + auto list_struct_type = arrow::struct_({ + arrow::field("a", arrow::int32(), false), + arrow::field("b", arrow::utf8(), true), + }); + add_field(arrow::field("nullable_list_struct_col", + arrow::list(arrow::field("element", list_struct_type, true)), true), + build_nullable_struct_list_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type->is_nullable()); + const auto& nullable_column = assert_cast(column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_FALSE(nullable_column.is_null_at(4)); + + const auto& array_column = + assert_cast(nullable_column.get_nested_column()); + const auto& offsets = array_column.get_offsets(); + ASSERT_EQ(offsets.size(), ROW_COUNT); + EXPECT_EQ(offsets[0], 2); + EXPECT_EQ(offsets[1], 2); + EXPECT_EQ(offsets[2], 2); + EXPECT_EQ(offsets[3], 4); + EXPECT_EQ(offsets[4], 5); + + const auto& elements = + assert_cast(array_column.get_data()); + const auto& struct_column = + assert_cast(elements.get_nested_column()); + const auto& a_values = + get_nullable_nested_column(struct_column.get_column(0)); + const auto& b_values = + assert_cast(struct_column.get_column(1)); + const auto& b_data = + assert_cast(b_values.get_nested_column()); + ASSERT_EQ(elements.size(), 5); + EXPECT_FALSE(elements.is_null_at(0)); + EXPECT_FALSE(elements.is_null_at(1)); + EXPECT_TRUE(elements.is_null_at(2)); + EXPECT_FALSE(elements.is_null_at(3)); + EXPECT_EQ(a_values.get_element(0), 11); + EXPECT_EQ(a_values.get_element(1), 12); + EXPECT_EQ(a_values.get_element(3), 13); + EXPECT_EQ(a_values.get_element(4), 14); + EXPECT_EQ(b_data.get_data_at(0).to_string(), "la"); + EXPECT_TRUE(b_values.is_null_at(1)); + EXPECT_EQ(b_data.get_data_at(3).to_string(), "ld"); + EXPECT_EQ(b_data.get_data_at(4).to_string(), "le"); + }); + auto nested_list_type = arrow::list(arrow::field("element", arrow::int32(), true)); + add_field(arrow::field("nullable_list_list_int_col", + arrow::list(arrow::field("element", nested_list_type, true)), true), + build_nullable_list_list_int_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type->is_nullable()); + const auto& nullable_column = assert_cast(column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_FALSE(nullable_column.is_null_at(4)); + + const auto& outer_array = + assert_cast(nullable_column.get_nested_column()); + const auto& outer_offsets = outer_array.get_offsets(); + ASSERT_EQ(outer_offsets.size(), ROW_COUNT); + EXPECT_EQ(outer_offsets[0], 4); + EXPECT_EQ(outer_offsets[1], 4); + EXPECT_EQ(outer_offsets[2], 4); + EXPECT_EQ(outer_offsets[3], 5); + EXPECT_EQ(outer_offsets[4], 7); + + const auto& inner_nullable = + assert_cast(outer_array.get_data()); + ASSERT_EQ(inner_nullable.size(), 7); + EXPECT_FALSE(inner_nullable.is_null_at(0)); + EXPECT_FALSE(inner_nullable.is_null_at(1)); + EXPECT_TRUE(inner_nullable.is_null_at(2)); + EXPECT_FALSE(inner_nullable.is_null_at(3)); + EXPECT_FALSE(inner_nullable.is_null_at(6)); + + const auto& inner_array = + assert_cast(inner_nullable.get_nested_column()); + const auto& inner_offsets = inner_array.get_offsets(); + ASSERT_EQ(inner_offsets.size(), 7); + EXPECT_EQ(inner_offsets[0], 2); + EXPECT_EQ(inner_offsets[1], 2); + EXPECT_EQ(inner_offsets[2], 2); + EXPECT_EQ(inner_offsets[3], 4); + EXPECT_EQ(inner_offsets[4], 5); + EXPECT_EQ(inner_offsets[5], 5); + EXPECT_EQ(inner_offsets[6], 7); + + const auto& elements = + assert_cast(inner_array.get_data()); + const auto& values = + assert_cast(elements.get_nested_column()); + ASSERT_EQ(elements.size(), 7); + EXPECT_EQ(values.get_element(0), 1); + EXPECT_EQ(values.get_element(1), 2); + EXPECT_TRUE(elements.is_null_at(2)); + EXPECT_EQ(values.get_element(3), 3); + EXPECT_EQ(values.get_element(4), 4); + EXPECT_EQ(values.get_element(5), 5); + EXPECT_TRUE(elements.is_null_at(6)); + }); + add_field(arrow::field( + "map_int_string_col", + arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), false)), + false), + build_required_int_string_map_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_MAP); + const auto* map_type = + assert_cast(remove_nullable(schema.type).get()); + EXPECT_EQ(remove_nullable(map_type->get_key_type())->get_primitive_type(), + TYPE_INT); + EXPECT_EQ(remove_nullable(map_type->get_value_type())->get_primitive_type(), + TYPE_STRING); + const auto& map_column = assert_cast(column); + ASSERT_EQ(map_column.size(), ROW_COUNT); + const auto map_size_at = [&map_column](size_t row_idx) { + return map_column.get_offsets()[row_idx] - + (row_idx == 0 ? 0 : map_column.get_offsets()[row_idx - 1]); + }; + EXPECT_EQ(map_size_at(0), 2); + EXPECT_EQ(map_size_at(1), 1); + EXPECT_EQ(map_size_at(2), 3); + EXPECT_EQ(map_size_at(4), 2); + const auto& keys = + get_nullable_nested_column(map_column.get_keys()); + const auto& values = + get_nullable_nested_column(map_column.get_values()); + ASSERT_EQ(keys.size(), 9); + ASSERT_EQ(values.size(), 9); + EXPECT_EQ(keys.get_element(0), 1); + EXPECT_EQ(keys.get_element(5), 6); + EXPECT_EQ(keys.get_element(8), 9); + EXPECT_EQ(values.get_data_at(0).to_string(), "a"); + EXPECT_EQ(values.get_data_at(5).to_string(), "f"); + EXPECT_EQ(values.get_data_at(8).to_string(), "i"); + }); + add_field( + arrow::field("nullable_map_int_string_col", + arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)), + true), + build_nullable_int_string_map_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type->is_nullable()); + const auto& nullable_column = assert_cast(column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + const auto& map_column = + assert_cast(nullable_column.get_nested_column()); + const auto& offsets = map_column.get_offsets(); + ASSERT_EQ(offsets.size(), ROW_COUNT); + EXPECT_EQ(offsets[0], 2); + EXPECT_EQ(offsets[1], 2); + EXPECT_EQ(offsets[2], 2); + EXPECT_EQ(offsets[3], 3); + EXPECT_EQ(offsets[4], 4); + const auto& keys = + get_nullable_nested_column(map_column.get_keys()); + const auto& values = + assert_cast(map_column.get_values()); + const auto& value_data = + assert_cast(values.get_nested_column()); + ASSERT_EQ(keys.size(), 4); + EXPECT_EQ(keys.get_element(0), 10); + EXPECT_EQ(keys.get_element(1), 20); + EXPECT_EQ(keys.get_element(3), 40); + EXPECT_EQ(value_data.get_data_at(0).to_string(), "aa"); + EXPECT_TRUE(values.is_null_at(1)); + EXPECT_EQ(value_data.get_data_at(2).to_string(), "cc"); + EXPECT_TRUE(values.is_null_at(3)); + }); + add_field( + arrow::field("required_nullable_map_int_string_col", + arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)), + false), + build_required_nullable_string_map_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_FALSE(schema.type->is_nullable()); + const auto& map_column = assert_cast(column); + const auto& offsets = map_column.get_offsets(); + ASSERT_EQ(offsets.size(), ROW_COUNT); + EXPECT_EQ(offsets[0], 0); + EXPECT_EQ(offsets[1], 2); + EXPECT_EQ(offsets[2], 3); + EXPECT_EQ(offsets[3], 3); + EXPECT_EQ(offsets[4], 4); + const auto& values = + assert_cast(map_column.get_values()); + ASSERT_EQ(values.size(), 4); + EXPECT_TRUE(values.is_null_at(0)); + EXPECT_FALSE(values.is_null_at(1)); + EXPECT_TRUE(values.is_null_at(3)); + }); + auto map_struct_type = arrow::struct_({ + arrow::field("a", arrow::int32(), false), + arrow::field("b", arrow::utf8(), true), + }); + add_field(arrow::field( + "nullable_map_int_struct_col", + arrow::map(arrow::int32(), arrow::field("value", map_struct_type, true)), + true), + build_nullable_int_struct_map_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type->is_nullable()); + const auto& nullable_column = assert_cast(column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_FALSE(nullable_column.is_null_at(4)); + + const auto& map_column = + assert_cast(nullable_column.get_nested_column()); + const auto& offsets = map_column.get_offsets(); + ASSERT_EQ(offsets.size(), ROW_COUNT); + EXPECT_EQ(offsets[0], 2); + EXPECT_EQ(offsets[1], 2); + EXPECT_EQ(offsets[2], 2); + EXPECT_EQ(offsets[3], 3); + EXPECT_EQ(offsets[4], 4); + + const auto& keys = + get_nullable_nested_column(map_column.get_keys()); + const auto& values = + assert_cast(map_column.get_values()); + const auto& struct_column = + assert_cast(values.get_nested_column()); + const auto& a_values = + get_nullable_nested_column(struct_column.get_column(0)); + const auto& b_values = + assert_cast(struct_column.get_column(1)); + const auto& b_data = + assert_cast(b_values.get_nested_column()); + ASSERT_EQ(keys.size(), 4); + ASSERT_EQ(values.size(), 4); + EXPECT_EQ(keys.get_element(0), 101); + EXPECT_EQ(keys.get_element(1), 102); + EXPECT_EQ(keys.get_element(3), 104); + EXPECT_FALSE(values.is_null_at(0)); + EXPECT_FALSE(values.is_null_at(1)); + EXPECT_TRUE(values.is_null_at(2)); + EXPECT_FALSE(values.is_null_at(3)); + EXPECT_EQ(a_values.get_element(0), 21); + EXPECT_EQ(a_values.get_element(1), 22); + EXPECT_EQ(a_values.get_element(3), 24); + EXPECT_EQ(b_data.get_data_at(0).to_string(), "ma"); + EXPECT_TRUE(b_values.is_null_at(1)); + EXPECT_EQ(b_data.get_data_at(3).to_string(), "me"); + }); + auto map_list_type = arrow::list(arrow::field("element", arrow::int32(), true)); + add_field( + arrow::field("nullable_map_int_list_col", + arrow::map(arrow::int32(), arrow::field("value", map_list_type, true)), + true), + build_nullable_int_list_map_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type->is_nullable()); + const auto& nullable_column = assert_cast(column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_FALSE(nullable_column.is_null_at(4)); + + const auto& map_column = + assert_cast(nullable_column.get_nested_column()); + const auto& map_offsets = map_column.get_offsets(); + ASSERT_EQ(map_offsets.size(), ROW_COUNT); + EXPECT_EQ(map_offsets[0], 2); + EXPECT_EQ(map_offsets[1], 2); + EXPECT_EQ(map_offsets[2], 2); + EXPECT_EQ(map_offsets[3], 4); + EXPECT_EQ(map_offsets[4], 5); + + const auto& keys = + get_nullable_nested_column(map_column.get_keys()); + ASSERT_EQ(keys.size(), 5); + EXPECT_EQ(keys.get_element(0), 201); + EXPECT_EQ(keys.get_element(1), 202); + EXPECT_EQ(keys.get_element(2), 203); + EXPECT_EQ(keys.get_element(3), 204); + EXPECT_EQ(keys.get_element(4), 205); + + const auto& values = + assert_cast(map_column.get_values()); + ASSERT_EQ(values.size(), 5); + EXPECT_FALSE(values.is_null_at(0)); + EXPECT_FALSE(values.is_null_at(1)); + EXPECT_TRUE(values.is_null_at(2)); + EXPECT_FALSE(values.is_null_at(3)); + EXPECT_FALSE(values.is_null_at(4)); + + const auto& list_column = + assert_cast(values.get_nested_column()); + const auto& list_offsets = list_column.get_offsets(); + ASSERT_EQ(list_offsets.size(), 5); + EXPECT_EQ(list_offsets[0], 2); + EXPECT_EQ(list_offsets[1], 2); + EXPECT_EQ(list_offsets[2], 2); + EXPECT_EQ(list_offsets[3], 4); + EXPECT_EQ(list_offsets[4], 5); + + const auto& elements = + assert_cast(list_column.get_data()); + const auto& element_values = + assert_cast(elements.get_nested_column()); + ASSERT_EQ(elements.size(), 5); + EXPECT_EQ(element_values.get_element(0), 1); + EXPECT_EQ(element_values.get_element(1), 2); + EXPECT_TRUE(elements.is_null_at(2)); + EXPECT_EQ(element_values.get_element(3), 3); + EXPECT_EQ(element_values.get_element(4), 4); + }); + auto list_map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)); + add_field(arrow::field("nullable_list_map_int_string_col", + arrow::list(arrow::field("element", list_map_type, true)), true), + build_nullable_map_list_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type->is_nullable()); + const auto& nullable_column = assert_cast(column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_FALSE(nullable_column.is_null_at(4)); + + const auto& outer_array = + assert_cast(nullable_column.get_nested_column()); + const auto& outer_offsets = outer_array.get_offsets(); + ASSERT_EQ(outer_offsets.size(), ROW_COUNT); + EXPECT_EQ(outer_offsets[0], 2); + EXPECT_EQ(outer_offsets[1], 2); + EXPECT_EQ(outer_offsets[2], 2); + EXPECT_EQ(outer_offsets[3], 4); + EXPECT_EQ(outer_offsets[4], 5); + + const auto& map_values = + assert_cast(outer_array.get_data()); + ASSERT_EQ(map_values.size(), 5); + EXPECT_FALSE(map_values.is_null_at(0)); + EXPECT_FALSE(map_values.is_null_at(1)); + EXPECT_TRUE(map_values.is_null_at(2)); + EXPECT_FALSE(map_values.is_null_at(3)); + EXPECT_FALSE(map_values.is_null_at(4)); + + const auto& map_column = + assert_cast(map_values.get_nested_column()); + const auto& map_offsets = map_column.get_offsets(); + ASSERT_EQ(map_offsets.size(), 5); + EXPECT_EQ(map_offsets[0], 2); + EXPECT_EQ(map_offsets[1], 2); + EXPECT_EQ(map_offsets[2], 2); + EXPECT_EQ(map_offsets[3], 3); + EXPECT_EQ(map_offsets[4], 4); + const auto& keys = + get_nullable_nested_column(map_column.get_keys()); + const auto& values = + assert_cast(map_column.get_values()); + const auto& value_data = + assert_cast(values.get_nested_column()); + ASSERT_EQ(keys.size(), 4); + EXPECT_EQ(keys.get_element(0), 1); + EXPECT_EQ(keys.get_element(1), 2); + EXPECT_EQ(keys.get_element(2), 3); + EXPECT_EQ(keys.get_element(3), 4); + EXPECT_EQ(value_data.get_data_at(0).to_string(), "a"); + EXPECT_TRUE(values.is_null_at(1)); + EXPECT_EQ(value_data.get_data_at(2).to_string(), "c"); + EXPECT_EQ(value_data.get_data_at(3).to_string(), "d"); + }); + auto nested_map_type = + arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)); + add_field(arrow::field( + "nullable_map_int_map_int_string_col", + arrow::map(arrow::int32(), arrow::field("value", nested_map_type, true)), + true), + build_nullable_int_map_map_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type->is_nullable()); + const auto& nullable_column = assert_cast(column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_FALSE(nullable_column.is_null_at(4)); + + const auto& outer_map = + assert_cast(nullable_column.get_nested_column()); + const auto& outer_offsets = outer_map.get_offsets(); + ASSERT_EQ(outer_offsets.size(), ROW_COUNT); + EXPECT_EQ(outer_offsets[0], 2); + EXPECT_EQ(outer_offsets[1], 2); + EXPECT_EQ(outer_offsets[2], 2); + EXPECT_EQ(outer_offsets[3], 4); + EXPECT_EQ(outer_offsets[4], 4); + + const auto& outer_keys = + get_nullable_nested_column(outer_map.get_keys()); + ASSERT_EQ(outer_keys.size(), 4); + EXPECT_EQ(outer_keys.get_element(0), 10); + EXPECT_EQ(outer_keys.get_element(1), 20); + EXPECT_EQ(outer_keys.get_element(2), 30); + EXPECT_EQ(outer_keys.get_element(3), 40); + + const auto& inner_values = + assert_cast(outer_map.get_values()); + ASSERT_EQ(inner_values.size(), 4); + EXPECT_FALSE(inner_values.is_null_at(0)); + EXPECT_FALSE(inner_values.is_null_at(1)); + EXPECT_TRUE(inner_values.is_null_at(2)); + EXPECT_FALSE(inner_values.is_null_at(3)); + + const auto& inner_map = + assert_cast(inner_values.get_nested_column()); + const auto& inner_offsets = inner_map.get_offsets(); + ASSERT_EQ(inner_offsets.size(), 4); + EXPECT_EQ(inner_offsets[0], 1); + EXPECT_EQ(inner_offsets[1], 1); + EXPECT_EQ(inner_offsets[2], 1); + EXPECT_EQ(inner_offsets[3], 2); + const auto& inner_keys = + get_nullable_nested_column(inner_map.get_keys()); + const auto& inner_strings = + assert_cast(inner_map.get_values()); + const auto& inner_string_data = + assert_cast(inner_strings.get_nested_column()); + ASSERT_EQ(inner_keys.size(), 2); + EXPECT_EQ(inner_keys.get_element(0), 101); + EXPECT_EQ(inner_keys.get_element(1), 401); + EXPECT_EQ(inner_string_data.get_data_at(0).to_string(), "aa"); + EXPECT_TRUE(inner_strings.is_null_at(1)); + }); + auto deep_list_value_type = arrow::list(arrow::field("element", arrow::int32(), true)); + auto deep_list_map_type = + arrow::map(arrow::int32(), arrow::field("value", deep_list_value_type, true)); + auto deep_list_struct_type = arrow::struct_({arrow::field("kv", deep_list_map_type, true)}); + add_field(arrow::field("nullable_list_struct_map_list_col", + arrow::list(arrow::field("element", deep_list_struct_type, true)), + true), + build_deep_list_struct_map_list_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type->is_nullable()); + const auto& nullable_column = assert_cast(column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_FALSE(nullable_column.is_null_at(4)); + + const auto& outer_array = + assert_cast(nullable_column.get_nested_column()); + const auto& outer_offsets = outer_array.get_offsets(); + ASSERT_EQ(outer_offsets.size(), ROW_COUNT); + EXPECT_EQ(outer_offsets[0], 2); + EXPECT_EQ(outer_offsets[1], 2); + EXPECT_EQ(outer_offsets[2], 2); + EXPECT_EQ(outer_offsets[3], 4); + EXPECT_EQ(outer_offsets[4], 5); + + const auto& struct_values = + assert_cast(outer_array.get_data()); + ASSERT_EQ(struct_values.size(), 5); + EXPECT_FALSE(struct_values.is_null_at(0)); + EXPECT_TRUE(struct_values.is_null_at(1)); + EXPECT_FALSE(struct_values.is_null_at(2)); + EXPECT_FALSE(struct_values.is_null_at(3)); + EXPECT_FALSE(struct_values.is_null_at(4)); + + const auto& struct_column = + assert_cast(struct_values.get_nested_column()); + const auto& map_values = + assert_cast(struct_column.get_column(0)); + ASSERT_EQ(map_values.size(), 5); + EXPECT_FALSE(map_values.is_null_at(0)); + EXPECT_TRUE(map_values.is_null_at(1)); + EXPECT_TRUE(map_values.is_null_at(2)); + EXPECT_FALSE(map_values.is_null_at(3)); + EXPECT_FALSE(map_values.is_null_at(4)); + + const auto& map_column = + assert_cast(map_values.get_nested_column()); + const auto& map_offsets = map_column.get_offsets(); + ASSERT_EQ(map_offsets.size(), 5); + EXPECT_EQ(map_offsets[0], 2); + EXPECT_EQ(map_offsets[1], 2); + EXPECT_EQ(map_offsets[2], 2); + EXPECT_EQ(map_offsets[3], 2); + EXPECT_EQ(map_offsets[4], 4); + const auto& keys = + get_nullable_nested_column(map_column.get_keys()); + ASSERT_EQ(keys.size(), 4); + EXPECT_EQ(keys.get_element(0), 1); + EXPECT_EQ(keys.get_element(1), 2); + EXPECT_EQ(keys.get_element(2), 3); + EXPECT_EQ(keys.get_element(3), 4); + + const auto& lists = + assert_cast(map_column.get_values()); + ASSERT_EQ(lists.size(), 4); + EXPECT_FALSE(lists.is_null_at(0)); + EXPECT_FALSE(lists.is_null_at(1)); + EXPECT_TRUE(lists.is_null_at(2)); + EXPECT_FALSE(lists.is_null_at(3)); + const auto& list_column = + assert_cast(lists.get_nested_column()); + const auto& list_offsets = list_column.get_offsets(); + ASSERT_EQ(list_offsets.size(), 4); + EXPECT_EQ(list_offsets[0], 2); + EXPECT_EQ(list_offsets[1], 2); + EXPECT_EQ(list_offsets[2], 2); + EXPECT_EQ(list_offsets[3], 3); + const auto& elements = + assert_cast(list_column.get_data()); + const auto& element_values = + assert_cast(elements.get_nested_column()); + ASSERT_EQ(elements.size(), 3); + EXPECT_EQ(element_values.get_element(0), 10); + EXPECT_TRUE(elements.is_null_at(1)); + EXPECT_EQ(element_values.get_element(2), 40); + }); + auto deep_map_nested_map_type = + arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)); + auto deep_map_list_type = + arrow::list(arrow::field("element", deep_map_nested_map_type, true)); + add_field( + arrow::field( + "nullable_map_int_list_map_int_string_col", + arrow::map(arrow::int32(), arrow::field("value", deep_map_list_type, true)), + true), + build_deep_map_list_map_array(), + [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type->is_nullable()); + const auto& nullable_column = assert_cast(column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_FALSE(nullable_column.is_null_at(4)); + + const auto& outer_map = + assert_cast(nullable_column.get_nested_column()); + const auto& outer_offsets = outer_map.get_offsets(); + ASSERT_EQ(outer_offsets.size(), ROW_COUNT); + EXPECT_EQ(outer_offsets[0], 2); + EXPECT_EQ(outer_offsets[1], 2); + EXPECT_EQ(outer_offsets[2], 2); + EXPECT_EQ(outer_offsets[3], 4); + EXPECT_EQ(outer_offsets[4], 5); + const auto& outer_keys = + get_nullable_nested_column(outer_map.get_keys()); + ASSERT_EQ(outer_keys.size(), 5); + EXPECT_EQ(outer_keys.get_element(0), 10); + EXPECT_EQ(outer_keys.get_element(1), 20); + EXPECT_EQ(outer_keys.get_element(2), 30); + EXPECT_EQ(outer_keys.get_element(3), 40); + EXPECT_EQ(outer_keys.get_element(4), 50); + + const auto& lists = assert_cast(outer_map.get_values()); + ASSERT_EQ(lists.size(), 5); + EXPECT_FALSE(lists.is_null_at(0)); + EXPECT_FALSE(lists.is_null_at(1)); + EXPECT_TRUE(lists.is_null_at(2)); + EXPECT_FALSE(lists.is_null_at(3)); + EXPECT_FALSE(lists.is_null_at(4)); + const auto& list_column = + assert_cast(lists.get_nested_column()); + const auto& list_offsets = list_column.get_offsets(); + ASSERT_EQ(list_offsets.size(), 5); + EXPECT_EQ(list_offsets[0], 3); + EXPECT_EQ(list_offsets[1], 3); + EXPECT_EQ(list_offsets[2], 3); + EXPECT_EQ(list_offsets[3], 4); + EXPECT_EQ(list_offsets[4], 6); + + const auto& inner_maps = + assert_cast(list_column.get_data()); + ASSERT_EQ(inner_maps.size(), 6); + EXPECT_FALSE(inner_maps.is_null_at(0)); + EXPECT_FALSE(inner_maps.is_null_at(1)); + EXPECT_TRUE(inner_maps.is_null_at(2)); + EXPECT_FALSE(inner_maps.is_null_at(3)); + EXPECT_TRUE(inner_maps.is_null_at(4)); + EXPECT_FALSE(inner_maps.is_null_at(5)); + const auto& inner_map_column = + assert_cast(inner_maps.get_nested_column()); + const auto& inner_offsets = inner_map_column.get_offsets(); + ASSERT_EQ(inner_offsets.size(), 6); + EXPECT_EQ(inner_offsets[0], 2); + EXPECT_EQ(inner_offsets[1], 2); + EXPECT_EQ(inner_offsets[2], 2); + EXPECT_EQ(inner_offsets[3], 3); + EXPECT_EQ(inner_offsets[4], 3); + EXPECT_EQ(inner_offsets[5], 4); + const auto& inner_keys = + get_nullable_nested_column(inner_map_column.get_keys()); + ASSERT_EQ(inner_keys.size(), 4); + EXPECT_EQ(inner_keys.get_element(0), 1); + EXPECT_EQ(inner_keys.get_element(1), 2); + EXPECT_EQ(inner_keys.get_element(2), 3); + EXPECT_EQ(inner_keys.get_element(3), 4); + const auto& strings = + assert_cast(inner_map_column.get_values()); + const auto& string_data = + assert_cast(strings.get_nested_column()); + ASSERT_EQ(strings.size(), 4); + EXPECT_EQ(string_data.get_data_at(0).to_string(), "a"); + EXPECT_TRUE(strings.is_null_at(1)); + EXPECT_EQ(string_data.get_data_at(2).to_string(), "c"); + EXPECT_EQ(string_data.get_data_at(3).to_string(), "d"); + }); + + auto schema = arrow::schema(_arrow_fields); + auto table = arrow::Table::Make(schema, _arrays); + + auto file_result = arrow::io::FileOutputStream::Open(_file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + ROW_COUNT, builder.build())); + } + + std::unique_ptr create_reader(size_t field_idx) const { + ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns()); + std::unique_ptr reader; + auto st = factory.create(*_fields[field_idx], &reader); + EXPECT_TRUE(st.ok()) << st; + return reader; + } + + std::unique_ptr create_projected_child_reader(size_t field_idx, + size_t child_idx) const { + const auto& struct_schema = *_fields[field_idx]; + EXPECT_LT(child_idx, struct_schema.children.size()); + + format::LocalColumnIndex projection; + projection.index = struct_schema.local_id; + projection.project_all_children = false; + format::LocalColumnIndex child_projection; + child_projection.index = struct_schema.children[child_idx]->local_id; + projection.children.push_back(std::move(child_projection)); + + ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns()); + std::unique_ptr reader; + auto st = factory.create(struct_schema, &projection, &reader); + EXPECT_TRUE(st.ok()) << st; + return reader; + } + + std::unique_ptr create_projected_grandchild_reader( + size_t field_idx, size_t child_idx, size_t grandchild_idx) const { + const auto& struct_schema = *_fields[field_idx]; + EXPECT_LT(child_idx, struct_schema.children.size()); + const auto& child_schema = *struct_schema.children[child_idx]; + EXPECT_LT(grandchild_idx, child_schema.children.size()); + + format::LocalColumnIndex projection; + projection.index = struct_schema.local_id; + projection.project_all_children = false; + format::LocalColumnIndex child_projection; + child_projection.index = child_schema.local_id; + child_projection.project_all_children = false; + format::LocalColumnIndex grandchild_projection; + grandchild_projection.index = child_schema.children[grandchild_idx]->local_id; + child_projection.children.push_back(std::move(grandchild_projection)); + projection.children.push_back(std::move(child_projection)); + + ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns()); + std::unique_ptr reader; + auto st = factory.create(struct_schema, &projection, &reader); + EXPECT_TRUE(st.ok()) << st; + return reader; + } + + void read_and_validate(size_t field_idx) const { + auto reader = create_reader(field_idx); + ASSERT_NE(reader, nullptr); + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + auto st = reader->read(ROW_COUNT, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, ROW_COUNT); + ASSERT_EQ(column->size(), ROW_COUNT); + _expected_by_field[field_idx](*_fields[field_idx], *column); + } + + size_t find_field_idx(const std::string& name) const { + for (size_t field_idx = 0; field_idx < _fields.size(); ++field_idx) { + if (_fields[field_idx]->name == name) { + return field_idx; + } + } + ADD_FAILURE() << "Cannot find parquet test field " << name; + return _fields.size(); + } + + std::filesystem::path _test_dir; + std::string _file_path; + std::unique_ptr<::parquet::ParquetFileReader> _file_reader; + std::shared_ptr<::parquet::RowGroupReader> _row_group; + std::vector> _fields; + std::vector> _arrow_fields; + std::vector> _arrays; + std::vector> _expected_by_field; +}; + +TEST(ParquetColumnReaderBaseTest, SelectionVectorRangesAndValidation) { + SelectionVector identity; + ASSERT_TRUE(identity.verify(4, 5).ok()); + auto ranges = selection_to_ranges(identity, 4); + ASSERT_EQ(ranges.size(), 1); + EXPECT_EQ(ranges[0].start, 0); + EXPECT_EQ(ranges[0].length, 4); + + std::array selected = {0, 2, 3, 6, 6}; + SelectionVector external(selected.data(), 4); + auto status = external.verify(3, 7); + ASSERT_TRUE(status.ok()) << status; + ranges = selection_to_ranges(external, 3); + ASSERT_EQ(ranges.size(), 2); + EXPECT_EQ(ranges[0].start, 0); + EXPECT_EQ(ranges[0].length, 1); + EXPECT_EQ(ranges[1].start, 2); + EXPECT_EQ(ranges[1].length, 2); + + EXPECT_FALSE(external.verify(8, 7).ok()); + EXPECT_FALSE(external.verify(5, 7).ok()); + EXPECT_FALSE(external.verify(4, 6).ok()); + + std::array duplicate = {0, 2, 2}; + SelectionVector non_strict(duplicate.data(), duplicate.size()); + EXPECT_FALSE(non_strict.verify(3, 5).ok()); + EXPECT_FALSE(identity.verify(1, -1).ok()); +} + +TEST(ParquetColumnReaderBaseTest, DefaultSelectUsesSkipReadRangesAndSkipNestedUsesBuild) { + DefaultSelectReader reader; + std::array selected = {1, 3, 4}; + SelectionVector selection(selected.data(), selected.size()); + auto column = ColumnInt32::create(); + MutableColumnPtr mutable_column = std::move(column); + auto status = reader.select(selection, selected.size(), 6, mutable_column); + ASSERT_TRUE(status.ok()) << status; + + const auto& values = assert_cast(*mutable_column); + ASSERT_EQ(values.size(), 3); + EXPECT_EQ(values.get_element(0), 1); + EXPECT_EQ(values.get_element(1), 3); + EXPECT_EQ(values.get_element(2), 4); + EXPECT_EQ(reader.skip_ranges(), std::vector({1, 1, 1})); + EXPECT_EQ(reader.read_ranges(), std::vector({1, 2})); + + BaseUnsupportedReader unsupported_reader; + auto skip_status = unsupported_reader.skip(1); + EXPECT_FALSE(skip_status.ok()); + EXPECT_NE(skip_status.to_string().find("skip is not implemented"), std::string::npos); + EXPECT_FALSE(unsupported_reader.load_nested_batch(1).ok()); + int64_t values_read = 0; + EXPECT_FALSE(unsupported_reader.build_nested_column(1, mutable_column, &values_read).ok()); + + NestedSkipReader nested_reader; + auto nested_status = nested_reader.skip_nested_column(3); + ASSERT_TRUE(nested_status.ok()) << nested_status; +} + +TEST_F(ParquetColumnReaderTest, ScalarReadCoversRequiredNullableAllNullAndMultipleBatches) { + read_and_validate(find_field_idx("int32_col")); + read_and_validate(find_field_idx("string_col")); + read_and_validate(find_field_idx("nullable_int_col")); + read_and_validate(find_field_idx("all_null_int_col")); + + auto reader = create_reader(find_field_idx("int32_col")); + auto column = reader->type()->create_column(); + int64_t rows_read = 0; + ASSERT_TRUE(reader->read(2, column, &rows_read).ok()); + ASSERT_EQ(rows_read, 2); + ASSERT_TRUE(reader->read(3, column, &rows_read).ok()); + ASSERT_EQ(rows_read, 3); + const auto& values = assert_cast(*column); + ASSERT_EQ(values.size(), ROW_COUNT); + EXPECT_EQ(values.get_element(0), 10); + EXPECT_EQ(values.get_element(1), 20); + EXPECT_EQ(values.get_element(2), 30); + EXPECT_EQ(values.get_element(4), 50); +} + +TEST_F(ParquetColumnReaderTest, ScalarSkipCoversZeroSomeAllAndNulls) { + auto reader = create_reader(find_field_idx("int32_col")); + ASSERT_TRUE(reader->skip(0).ok()); + auto column = reader->type()->create_column(); + int64_t rows_read = 0; + ASSERT_TRUE(reader->read(1, column, &rows_read).ok()); + ASSERT_EQ(rows_read, 1); + const auto& first_value = assert_cast(*column); + EXPECT_EQ(first_value.get_element(0), 10); + + reader = create_reader(find_field_idx("int32_col")); + ASSERT_TRUE(reader->skip(2).ok()); + column = reader->type()->create_column(); + ASSERT_TRUE(reader->read(2, column, &rows_read).ok()); + ASSERT_EQ(rows_read, 2); + const auto& skipped_values = assert_cast(*column); + EXPECT_EQ(skipped_values.get_element(0), 30); + EXPECT_EQ(skipped_values.get_element(1), 40); + + reader = create_reader(find_field_idx("int32_col")); + ASSERT_TRUE(reader->skip(ROW_COUNT).ok()); + column = reader->type()->create_column(); + ASSERT_TRUE(reader->read(1, column, &rows_read).ok()); + EXPECT_EQ(rows_read, 0); + EXPECT_EQ(column->size(), 0); + + reader = create_reader(find_field_idx("nullable_int_col")); + ASSERT_TRUE(reader->skip(1).ok()); + column = reader->type()->create_column(); + ASSERT_TRUE(reader->read(2, column, &rows_read).ok()); + ASSERT_EQ(rows_read, 2); + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 2); + EXPECT_TRUE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); +} + +TEST_F(ParquetColumnReaderTest, ScalarSelectCoversAllDisjointSingleZeroThenReadAndNulls) { + auto reader = create_reader(find_field_idx("int32_col")); + SelectionVector all_selected(ROW_COUNT); + auto column = reader->type()->create_column(); + ASSERT_TRUE(reader->select(all_selected, ROW_COUNT, ROW_COUNT, column).ok()); + const auto& all_values = assert_cast(*column); + ASSERT_EQ(all_values.size(), ROW_COUNT); + EXPECT_EQ(all_values.get_element(0), 10); + EXPECT_EQ(all_values.get_element(4), 50); + + reader = create_reader(find_field_idx("int32_col")); + std::array disjoint = {0, 2, 4}; + SelectionVector disjoint_selection(disjoint.data(), disjoint.size()); + column = reader->type()->create_column(); + ASSERT_TRUE(reader->select(disjoint_selection, disjoint.size(), ROW_COUNT, column).ok()); + const auto& disjoint_values = assert_cast(*column); + ASSERT_EQ(disjoint_values.size(), 3); + EXPECT_EQ(disjoint_values.get_element(0), 10); + EXPECT_EQ(disjoint_values.get_element(1), 30); + EXPECT_EQ(disjoint_values.get_element(2), 50); + + reader = create_reader(find_field_idx("int32_col")); + std::array single = {2}; + SelectionVector single_selection(single.data(), single.size()); + column = reader->type()->create_column(); + ASSERT_TRUE(reader->select(single_selection, single.size(), ROW_COUNT, column).ok()); + const auto& single_value = assert_cast(*column); + ASSERT_EQ(single_value.size(), 1); + EXPECT_EQ(single_value.get_element(0), 30); + + reader = create_reader(find_field_idx("int32_col")); + std::array first_last = {0, 4}; + SelectionVector first_last_selection(first_last.data(), first_last.size()); + column = reader->type()->create_column(); + ASSERT_TRUE(reader->select(first_last_selection, first_last.size(), ROW_COUNT, column).ok()); + const auto& first_last_values = assert_cast(*column); + ASSERT_EQ(first_last_values.size(), 2); + EXPECT_EQ(first_last_values.get_element(0), 10); + EXPECT_EQ(first_last_values.get_element(1), 50); + + reader = create_reader(find_field_idx("int32_col")); + SelectionVector empty_selection; + column = reader->type()->create_column(); + ASSERT_TRUE(reader->select(empty_selection, 0, 2, column).ok()); + ASSERT_EQ(column->size(), 0); + int64_t rows_read = 0; + ASSERT_TRUE(reader->read(1, column, &rows_read).ok()); + ASSERT_EQ(rows_read, 1); + const auto& after_empty_select = assert_cast(*column); + ASSERT_EQ(after_empty_select.size(), 1); + EXPECT_EQ(after_empty_select.get_element(0), 30); + + reader = create_reader(find_field_idx("nullable_int_col")); + std::array nullable_rows = {0, 1, 2}; + SelectionVector nullable_selection(nullable_rows.data(), nullable_rows.size()); + column = reader->type()->create_column(); + ASSERT_TRUE(reader->select(nullable_selection, nullable_rows.size(), ROW_COUNT, column).ok()); + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); +} + +TEST_F(ParquetColumnReaderTest, FactoryRejectsInvalidScalarInputsAndNestedScalarProjection) { + ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns()); + std::unique_ptr reader; + + const auto& int_schema = *_fields[find_field_idx("int32_col")]; + ParquetColumnSchema invalid_leaf; + invalid_leaf.kind = ParquetColumnSchemaKind::PRIMITIVE; + invalid_leaf.name = "invalid_leaf"; + invalid_leaf.type = int_schema.type; + invalid_leaf.type_descriptor = int_schema.type_descriptor; + invalid_leaf.descriptor = int_schema.descriptor; + invalid_leaf.leaf_column_id = _file_reader->metadata()->num_columns(); + auto status = factory.create(invalid_leaf, &reader); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("Invalid parquet leaf column id"), std::string::npos); + + ParquetColumnSchema null_descriptor; + null_descriptor.kind = ParquetColumnSchemaKind::PRIMITIVE; + null_descriptor.name = "null_descriptor"; + null_descriptor.type = int_schema.type; + null_descriptor.type_descriptor = int_schema.type_descriptor; + null_descriptor.leaf_column_id = int_schema.leaf_column_id; + status = factory.create(null_descriptor, &reader); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("descriptor is null"), std::string::npos); + + const auto& list_element_schema = + *_fields[find_field_idx("nullable_list_int_col")]->children[0]; + status = factory.create(list_element_schema, &reader); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("flat primitive columns"), std::string::npos); + + const auto& list_schema = *_fields[find_field_idx("nullable_list_int_col")]; + format::LocalColumnIndex projection = + format::LocalColumnIndex::partial_local(list_schema.local_id); + format::LocalColumnIndex element_projection = + format::LocalColumnIndex::partial_local(list_element_schema.local_id); + projection.children.push_back(std::move(element_projection)); + status = factory.create(list_schema, &projection, &reader); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("scalar projection is invalid"), std::string::npos); +} + +TEST_F(ParquetColumnReaderTest, FactoryRejectsInvalidComplexProjections) { + ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns()); + std::unique_ptr reader; + + const auto& struct_schema = *_fields[find_field_idx("struct_col")]; + format::LocalColumnIndex struct_empty = + format::LocalColumnIndex::partial_local(struct_schema.local_id); + auto status = factory.create(struct_schema, &struct_empty, &reader); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("contains no children"), std::string::npos); + + format::LocalColumnIndex struct_invalid = + format::LocalColumnIndex::partial_local(struct_schema.local_id); + struct_invalid.children.push_back(format::LocalColumnIndex::local(9999)); + status = factory.create(struct_schema, &struct_invalid, &reader); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("contains invalid child"), std::string::npos); + + const auto& list_schema = *_fields[find_field_idx("nullable_list_int_col")]; + format::LocalColumnIndex list_empty = + format::LocalColumnIndex::partial_local(list_schema.local_id); + status = factory.create(list_schema, &list_empty, &reader); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("contains no element"), std::string::npos); + + const auto& map_schema = *_fields[find_field_idx("nullable_map_int_struct_col")]; + const auto& value_schema = *map_schema.children[1]; + format::LocalColumnIndex map_invalid = + format::LocalColumnIndex::partial_local(map_schema.local_id); + map_invalid.children.push_back(format::LocalColumnIndex::local(value_schema.local_id)); + map_invalid.children.push_back(format::LocalColumnIndex::local(9999)); + status = factory.create(map_schema, &map_invalid, &reader); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("contains invalid child"), std::string::npos); +} + +TEST_F(ParquetColumnReaderTest, ReadSupportedComplexTypes) { + read_and_validate(find_field_idx("struct_col")); + read_and_validate(find_field_idx("nullable_struct_col")); + read_and_validate(find_field_idx("nullable_struct_decimal_col")); + read_and_validate(find_field_idx("list_int_col")); + read_and_validate(find_field_idx("nullable_list_int_col")); + read_and_validate(find_field_idx("required_nullable_list_int_col")); + read_and_validate(find_field_idx("nullable_list_struct_col")); + read_and_validate(find_field_idx("nullable_list_list_int_col")); + read_and_validate(find_field_idx("map_int_string_col")); + read_and_validate(find_field_idx("nullable_map_int_string_col")); + read_and_validate(find_field_idx("required_nullable_map_int_string_col")); + read_and_validate(find_field_idx("nullable_map_int_struct_col")); + read_and_validate(find_field_idx("nullable_map_int_list_col")); + read_and_validate(find_field_idx("nullable_list_map_int_string_col")); + read_and_validate(find_field_idx("nullable_map_int_map_int_string_col")); + read_and_validate(find_field_idx("nullable_list_struct_map_list_col")); + read_and_validate(find_field_idx("nullable_map_int_list_map_int_string_col")); +} + +TEST_F(ParquetColumnReaderTest, SkipThenRead) { + auto reader = create_reader(find_field_idx("int32_col")); + auto st = reader->skip(2); + ASSERT_TRUE(st.ok()) << st; + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + st = reader->read(2, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 2); + + const auto& int_values = assert_cast(*column); + ASSERT_EQ(int_values.size(), 2); + EXPECT_EQ(int_values.get_element(0), 30); + EXPECT_EQ(int_values.get_element(1), 40); +} + +TEST_F(ParquetColumnReaderTest, SelectReadsOnlySelectedRanges) { + auto reader = create_reader(find_field_idx("int32_col")); + SelectionVector selection(3); + selection.set_index(0, 0); + selection.set_index(1, 2); + selection.set_index(2, 4); + + MutableColumnPtr column = reader->type()->create_column(); + auto st = reader->select(selection, 3, ROW_COUNT, column); + ASSERT_TRUE(st.ok()) << st; + + const auto& int_values = assert_cast(*column); + ASSERT_EQ(int_values.size(), 3); + EXPECT_EQ(int_values.get_element(0), 10); + EXPECT_EQ(int_values.get_element(1), 30); + EXPECT_EQ(int_values.get_element(2), 50); +} + +TEST_F(ParquetColumnReaderTest, ReadProjectedStructChildren) { + const auto field_idx = find_field_idx("struct_col"); + ASSERT_LT(field_idx, _fields.size()); + const auto& struct_schema = *_fields[field_idx]; + ASSERT_EQ(struct_schema.name, "struct_col"); + ASSERT_EQ(struct_schema.children.size(), 2); + + format::LocalColumnIndex projection; + projection.index = struct_schema.local_id; + projection.project_all_children = false; + format::LocalColumnIndex child_projection; + child_projection.index = struct_schema.children[1]->local_id; + projection.children.push_back(std::move(child_projection)); + + ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns()); + std::unique_ptr reader; + auto st = factory.create(struct_schema, &projection, &reader); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(remove_nullable(reader->type())->get_primitive_type(), TYPE_STRUCT); + const auto* projected_type = + assert_cast(remove_nullable(reader->type()).get()); + ASSERT_EQ(projected_type->get_elements().size(), 1); + EXPECT_EQ(projected_type->get_element_name(0), "b"); + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + st = reader->read(ROW_COUNT, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, ROW_COUNT); + const auto& struct_column = assert_cast(*column); + ASSERT_EQ(struct_column.get_columns().size(), 1); + const auto& values = get_nullable_nested_column(struct_column.get_column(0)); + EXPECT_EQ(values.get_data_at(0).to_string(), "sa"); + EXPECT_EQ(values.get_data_at(4).to_string(), "se"); +} + +TEST_F(ParquetColumnReaderTest, ReadProjectedNullableStructChildren) { + const auto field_idx = find_field_idx("nullable_struct_col"); + ASSERT_LT(field_idx, _fields.size()); + const auto& struct_schema = *_fields[field_idx]; + ASSERT_EQ(struct_schema.name, "nullable_struct_col"); + ASSERT_EQ(struct_schema.children.size(), 2); + + format::LocalColumnIndex projection; + projection.index = struct_schema.local_id; + projection.project_all_children = false; + format::LocalColumnIndex child_projection; + child_projection.index = struct_schema.children[1]->local_id; + projection.children.push_back(std::move(child_projection)); + + ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns()); + std::unique_ptr reader; + auto st = factory.create(struct_schema, &projection, &reader); + ASSERT_TRUE(st.ok()) << st; + ASSERT_TRUE(reader->type()->is_nullable()); + ASSERT_EQ(remove_nullable(reader->type())->get_primitive_type(), TYPE_STRUCT); + const auto* projected_type = + assert_cast(remove_nullable(reader->type()).get()); + ASSERT_EQ(projected_type->get_elements().size(), 1); + EXPECT_EQ(projected_type->get_element_name(0), "b"); + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + st = reader->read(ROW_COUNT, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, ROW_COUNT); + const auto& nullable_column = assert_cast(*column); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_TRUE(nullable_column.is_null_at(4)); + const auto& struct_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(struct_column.get_columns().size(), 1); + const auto& values = assert_cast(struct_column.get_column(0)); + const auto& nested_values = assert_cast(values.get_nested_column()); + EXPECT_FALSE(values.is_null_at(0)); + EXPECT_TRUE(values.is_null_at(2)); + EXPECT_FALSE(values.is_null_at(3)); + EXPECT_EQ(nested_values.get_data_at(0).to_string(), "nsa"); + EXPECT_EQ(nested_values.get_data_at(3).to_string(), "nsd"); +} + +TEST_F(ParquetColumnReaderTest, ReadProjectedListStructElementChildren) { + const auto field_idx = find_field_idx("nullable_list_struct_col"); + ASSERT_LT(field_idx, _fields.size()); + const auto& list_schema = *_fields[field_idx]; + ASSERT_EQ(list_schema.name, "nullable_list_struct_col"); + ASSERT_EQ(list_schema.children.size(), 1); + const auto& element_schema = *list_schema.children[0]; + ASSERT_EQ(element_schema.children.size(), 2); + + format::LocalColumnIndex projection; + projection.index = list_schema.local_id; + projection.project_all_children = false; + format::LocalColumnIndex element_projection; + element_projection.index = element_schema.local_id; + element_projection.project_all_children = false; + format::LocalColumnIndex child_projection; + child_projection.index = element_schema.children[1]->local_id; + element_projection.children.push_back(std::move(child_projection)); + projection.children.push_back(std::move(element_projection)); + + ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns()); + std::unique_ptr reader; + auto st = factory.create(list_schema, &projection, &reader); + ASSERT_TRUE(st.ok()) << st; + ASSERT_TRUE(reader->type()->is_nullable()); + const auto* array_type = + assert_cast(remove_nullable(reader->type()).get()); + const auto* element_type = assert_cast( + remove_nullable(array_type->get_nested_type()).get()); + ASSERT_EQ(element_type->get_elements().size(), 1); + EXPECT_EQ(element_type->get_element_name(0), "b"); + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + st = reader->read(ROW_COUNT, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, ROW_COUNT); + + const auto& nullable_column = assert_cast(*column); + const auto& array_column = assert_cast(nullable_column.get_nested_column()); + const auto& elements = assert_cast(array_column.get_data()); + const auto& struct_column = assert_cast(elements.get_nested_column()); + ASSERT_EQ(struct_column.get_columns().size(), 1); + const auto& b_values = assert_cast(struct_column.get_column(0)); + const auto& b_data = assert_cast(b_values.get_nested_column()); + ASSERT_EQ(elements.size(), 5); + EXPECT_EQ(b_data.get_data_at(0).to_string(), "la"); + EXPECT_TRUE(b_values.is_null_at(1)); + EXPECT_TRUE(elements.is_null_at(2)); + EXPECT_EQ(b_data.get_data_at(3).to_string(), "ld"); + EXPECT_EQ(b_data.get_data_at(4).to_string(), "le"); +} + +TEST_F(ParquetColumnReaderTest, ReadProjectedMapStructValueChildren) { + const auto field_idx = find_field_idx("nullable_map_int_struct_col"); + ASSERT_LT(field_idx, _fields.size()); + const auto& map_schema = *_fields[field_idx]; + ASSERT_EQ(map_schema.name, "nullable_map_int_struct_col"); + ASSERT_EQ(map_schema.children.size(), 2); + const auto& value_schema = *map_schema.children[1]; + ASSERT_EQ(value_schema.children.size(), 2); + + format::LocalColumnIndex projection; + projection.index = map_schema.local_id; + projection.project_all_children = false; + format::LocalColumnIndex value_projection; + value_projection.index = value_schema.local_id; + value_projection.project_all_children = false; + format::LocalColumnIndex child_projection; + child_projection.index = value_schema.children[1]->local_id; + value_projection.children.push_back(std::move(child_projection)); + projection.children.push_back(std::move(value_projection)); + + ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns()); + std::unique_ptr reader; + auto st = factory.create(map_schema, &projection, &reader); + ASSERT_TRUE(st.ok()) << st; + ASSERT_TRUE(reader->type()->is_nullable()); + const auto* map_type = assert_cast(remove_nullable(reader->type()).get()); + EXPECT_EQ(remove_nullable(map_type->get_key_type())->get_primitive_type(), TYPE_INT); + const auto* value_type = + assert_cast(remove_nullable(map_type->get_value_type()).get()); + ASSERT_EQ(value_type->get_elements().size(), 1); + EXPECT_EQ(value_type->get_element_name(0), "b"); + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + st = reader->read(ROW_COUNT, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, ROW_COUNT); + + const auto& nullable_column = assert_cast(*column); + const auto& map_column = assert_cast(nullable_column.get_nested_column()); + const auto& keys = get_nullable_nested_column(map_column.get_keys()); + const auto& values = assert_cast(map_column.get_values()); + const auto& struct_column = assert_cast(values.get_nested_column()); + ASSERT_EQ(struct_column.get_columns().size(), 1); + const auto& b_values = assert_cast(struct_column.get_column(0)); + const auto& b_data = assert_cast(b_values.get_nested_column()); + ASSERT_EQ(keys.size(), 4); + ASSERT_EQ(values.size(), 4); + EXPECT_EQ(keys.get_element(0), 101); + EXPECT_EQ(keys.get_element(1), 102); + EXPECT_EQ(keys.get_element(3), 104); + EXPECT_EQ(b_data.get_data_at(0).to_string(), "ma"); + EXPECT_TRUE(b_values.is_null_at(1)); + EXPECT_TRUE(values.is_null_at(2)); + EXPECT_EQ(b_data.get_data_at(3).to_string(), "me"); +} + +TEST_F(ParquetColumnReaderTest, AllowsMapKeyWithValueProjection) { + const auto field_idx = find_field_idx("nullable_map_int_struct_col"); + ASSERT_LT(field_idx, _fields.size()); + const auto& map_schema = *_fields[field_idx]; + ASSERT_EQ(map_schema.children.size(), 2); + const auto& key_schema = *map_schema.children[0]; + const auto& value_schema = *map_schema.children[1]; + + auto projection = format::LocalColumnIndex::partial_local(map_schema.local_id); + projection.children.push_back(format::LocalColumnIndex::local(key_schema.local_id)); + projection.children.push_back(format::LocalColumnIndex::local(value_schema.local_id)); + + ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns()); + std::unique_ptr reader; + const auto st = factory.create(map_schema, &projection, &reader); + ASSERT_TRUE(st.ok()) << st; + ASSERT_NE(reader, nullptr); +} + +TEST_F(ParquetColumnReaderTest, RejectMapKeyOnlyProjection) { + const auto field_idx = find_field_idx("nullable_map_int_struct_col"); + ASSERT_LT(field_idx, _fields.size()); + const auto& map_schema = *_fields[field_idx]; + ASSERT_EQ(map_schema.children.size(), 2); + const auto& key_schema = *map_schema.children[0]; + + auto projection = format::LocalColumnIndex::partial_local(map_schema.local_id); + projection.children.push_back(format::LocalColumnIndex::local(key_schema.local_id)); + + ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns()); + std::unique_ptr reader; + const auto st = factory.create(map_schema, &projection, &reader); + ASSERT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find("contains no value"), std::string::npos); +} + +TEST_F(ParquetColumnReaderTest, ReadProjectedStructListChildOnly) { + const auto field_idx = find_field_idx("nullable_struct_list_col"); + ASSERT_LT(field_idx, _fields.size()); + const auto& struct_schema = *_fields[field_idx]; + ASSERT_EQ(struct_schema.name, "nullable_struct_list_col"); + ASSERT_EQ(struct_schema.children.size(), 2); + + auto reader = create_projected_child_reader(field_idx, 1); + ASSERT_NE(reader, nullptr); + ASSERT_TRUE(reader->type()->is_nullable()); + const auto* projected_type = + assert_cast(remove_nullable(reader->type()).get()); + ASSERT_EQ(projected_type->get_elements().size(), 1); + EXPECT_EQ(projected_type->get_element_name(0), "xs"); + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + auto st = reader->read(2, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 2); + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_FALSE(nullable_column.is_null_at(4)); + const auto& struct_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(struct_column.get_columns().size(), 1); + const auto& xs_nullable = assert_cast(struct_column.get_column(0)); + ASSERT_EQ(xs_nullable.size(), ROW_COUNT); + EXPECT_FALSE(xs_nullable.is_null_at(0)); + EXPECT_FALSE(xs_nullable.is_null_at(2)); + EXPECT_TRUE(xs_nullable.is_null_at(3)); + EXPECT_FALSE(xs_nullable.is_null_at(4)); + const auto& xs_array = assert_cast(xs_nullable.get_nested_column()); + const auto& offsets = xs_array.get_offsets(); + ASSERT_EQ(offsets.size(), ROW_COUNT); + EXPECT_EQ(offsets[0], 2); + EXPECT_EQ(offsets[1], 2); + EXPECT_EQ(offsets[2], 2); + EXPECT_EQ(offsets[3], 2); + EXPECT_EQ(offsets[4], 4); + const auto& elements = assert_cast(xs_array.get_data()); + const auto& values = assert_cast(elements.get_nested_column()); + ASSERT_EQ(elements.size(), 4); + EXPECT_EQ(values.get_element(0), 1); + EXPECT_EQ(values.get_element(1), 2); + EXPECT_TRUE(elements.is_null_at(2)); + EXPECT_EQ(values.get_element(3), 5); +} + +TEST_F(ParquetColumnReaderTest, SkipProjectedStructListChildOnlyThenRead) { + const auto field_idx = find_field_idx("nullable_struct_list_col"); + auto reader = create_projected_child_reader(field_idx, 1); + ASSERT_NE(reader, nullptr); + auto st = reader->skip(1); + ASSERT_TRUE(st.ok()) << st; + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_TRUE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + const auto& struct_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(struct_column.get_columns().size(), 1); + const auto& xs_nullable = assert_cast(struct_column.get_column(0)); + ASSERT_EQ(xs_nullable.size(), 3); + EXPECT_FALSE(xs_nullable.is_null_at(1)); + EXPECT_TRUE(xs_nullable.is_null_at(2)); + const auto& xs_array = assert_cast(xs_nullable.get_nested_column()); + const auto& offsets = xs_array.get_offsets(); + ASSERT_EQ(offsets.size(), 3); + EXPECT_EQ(offsets[0], 0); + EXPECT_EQ(offsets[1], 0); + EXPECT_EQ(offsets[2], 0); +} + +TEST_F(ParquetColumnReaderTest, SelectProjectedStructListChildOnly) { + const auto field_idx = find_field_idx("nullable_struct_list_col"); + auto reader = create_projected_child_reader(field_idx, 1); + ASSERT_NE(reader, nullptr); + SelectionVector selection(3); + selection.set_index(0, 0); + selection.set_index(1, 3); + selection.set_index(2, 4); + + MutableColumnPtr column = reader->type()->create_column(); + auto st = reader->select(selection, 3, ROW_COUNT, column); + ASSERT_TRUE(st.ok()) << st; + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + const auto& struct_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(struct_column.get_columns().size(), 1); + const auto& xs_nullable = assert_cast(struct_column.get_column(0)); + ASSERT_EQ(xs_nullable.size(), 3); + EXPECT_FALSE(xs_nullable.is_null_at(0)); + EXPECT_TRUE(xs_nullable.is_null_at(1)); + EXPECT_FALSE(xs_nullable.is_null_at(2)); + const auto& xs_array = assert_cast(xs_nullable.get_nested_column()); + const auto& offsets = xs_array.get_offsets(); + ASSERT_EQ(offsets.size(), 3); + EXPECT_EQ(offsets[0], 2); + EXPECT_EQ(offsets[1], 2); + EXPECT_EQ(offsets[2], 4); +} + +TEST_F(ParquetColumnReaderTest, ReadProjectedStructMapChildOnly) { + const auto field_idx = find_field_idx("nullable_struct_map_col"); + ASSERT_LT(field_idx, _fields.size()); + const auto& struct_schema = *_fields[field_idx]; + ASSERT_EQ(struct_schema.name, "nullable_struct_map_col"); + ASSERT_EQ(struct_schema.children.size(), 2); + + auto reader = create_projected_child_reader(field_idx, 1); + ASSERT_NE(reader, nullptr); + ASSERT_TRUE(reader->type()->is_nullable()); + const auto* projected_type = + assert_cast(remove_nullable(reader->type()).get()); + ASSERT_EQ(projected_type->get_elements().size(), 1); + EXPECT_EQ(projected_type->get_element_name(0), "kv"); + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + auto st = reader->read(2, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 2); + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_FALSE(nullable_column.is_null_at(4)); + const auto& struct_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(struct_column.get_columns().size(), 1); + const auto& kv_nullable = assert_cast(struct_column.get_column(0)); + ASSERT_EQ(kv_nullable.size(), ROW_COUNT); + EXPECT_FALSE(kv_nullable.is_null_at(0)); + EXPECT_FALSE(kv_nullable.is_null_at(2)); + EXPECT_TRUE(kv_nullable.is_null_at(3)); + EXPECT_FALSE(kv_nullable.is_null_at(4)); + const auto& kv_map = assert_cast(kv_nullable.get_nested_column()); + const auto& offsets = kv_map.get_offsets(); + ASSERT_EQ(offsets.size(), ROW_COUNT); + EXPECT_EQ(offsets[0], 2); + EXPECT_EQ(offsets[1], 2); + EXPECT_EQ(offsets[2], 2); + EXPECT_EQ(offsets[3], 2); + EXPECT_EQ(offsets[4], 3); + const auto& keys = get_nullable_nested_column(kv_map.get_keys()); + const auto& values = assert_cast(kv_map.get_values()); + const auto& value_data = assert_cast(values.get_nested_column()); + ASSERT_EQ(keys.size(), 3); + EXPECT_EQ(keys.get_element(0), 1); + EXPECT_EQ(keys.get_element(1), 2); + EXPECT_EQ(keys.get_element(2), 5); + EXPECT_EQ(value_data.get_data_at(0).to_string(), "one"); + EXPECT_TRUE(values.is_null_at(1)); + EXPECT_EQ(value_data.get_data_at(2).to_string(), "five"); +} + +TEST_F(ParquetColumnReaderTest, NullableStructUsesListChildAsShapeSource) { + const auto field_idx = find_field_idx("nullable_struct_list_col"); + auto reader = create_projected_child_reader(field_idx, 1); + ASSERT_NE(reader, nullptr); + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + auto st = reader->read(ROW_COUNT, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, ROW_COUNT); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_FALSE(nullable_column.is_null_at(4)); +} + +TEST_F(ParquetColumnReaderTest, NullableStructUsesMapChildAsShapeSource) { + const auto field_idx = find_field_idx("nullable_struct_map_col"); + auto reader = create_projected_child_reader(field_idx, 1); + ASSERT_NE(reader, nullptr); + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + auto st = reader->read(ROW_COUNT, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, ROW_COUNT); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_FALSE(nullable_column.is_null_at(4)); +} + +TEST_F(ParquetColumnReaderTest, NullableStructUsesNestedStructComplexChildAsShapeSource) { + const auto field_idx = find_field_idx("nullable_struct_nested_struct_list_col"); + auto reader = create_projected_grandchild_reader(field_idx, 0, 0); + ASSERT_NE(reader, nullptr); + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + auto st = reader->read(ROW_COUNT, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, ROW_COUNT); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_FALSE(nullable_column.is_null_at(4)); + + const auto& struct_column = + assert_cast(nullable_column.get_nested_column()); + const auto& nested_nullable = assert_cast(struct_column.get_column(0)); + EXPECT_FALSE(nested_nullable.is_null_at(0)); + EXPECT_TRUE(nested_nullable.is_null_at(2)); + EXPECT_FALSE(nested_nullable.is_null_at(3)); + EXPECT_FALSE(nested_nullable.is_null_at(4)); +} + +TEST_F(ParquetColumnReaderTest, SkipProjectedStructMapChildOnlyThenRead) { + const auto field_idx = find_field_idx("nullable_struct_map_col"); + auto reader = create_projected_child_reader(field_idx, 1); + ASSERT_NE(reader, nullptr); + auto st = reader->skip(1); + ASSERT_TRUE(st.ok()) << st; + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_TRUE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + const auto& struct_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(struct_column.get_columns().size(), 1); + const auto& kv_nullable = assert_cast(struct_column.get_column(0)); + ASSERT_EQ(kv_nullable.size(), 3); + EXPECT_FALSE(kv_nullable.is_null_at(1)); + EXPECT_TRUE(kv_nullable.is_null_at(2)); + const auto& kv_map = assert_cast(kv_nullable.get_nested_column()); + const auto& offsets = kv_map.get_offsets(); + ASSERT_EQ(offsets.size(), 3); + EXPECT_EQ(offsets[0], 0); + EXPECT_EQ(offsets[1], 0); + EXPECT_EQ(offsets[2], 0); +} + +TEST_F(ParquetColumnReaderTest, SelectProjectedStructMapChildOnly) { + const auto field_idx = find_field_idx("nullable_struct_map_col"); + auto reader = create_projected_child_reader(field_idx, 1); + ASSERT_NE(reader, nullptr); + SelectionVector selection(3); + selection.set_index(0, 0); + selection.set_index(1, 3); + selection.set_index(2, 4); + + MutableColumnPtr column = reader->type()->create_column(); + auto st = reader->select(selection, 3, ROW_COUNT, column); + ASSERT_TRUE(st.ok()) << st; + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + const auto& struct_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(struct_column.get_columns().size(), 1); + const auto& kv_nullable = assert_cast(struct_column.get_column(0)); + ASSERT_EQ(kv_nullable.size(), 3); + EXPECT_FALSE(kv_nullable.is_null_at(0)); + EXPECT_TRUE(kv_nullable.is_null_at(1)); + EXPECT_FALSE(kv_nullable.is_null_at(2)); + const auto& kv_map = assert_cast(kv_nullable.get_nested_column()); + const auto& offsets = kv_map.get_offsets(); + ASSERT_EQ(offsets.size(), 3); + EXPECT_EQ(offsets[0], 2); + EXPECT_EQ(offsets[1], 2); + EXPECT_EQ(offsets[2], 3); + const auto& keys = get_nullable_nested_column(kv_map.get_keys()); + ASSERT_EQ(keys.size(), 3); + EXPECT_EQ(keys.get_element(0), 1); + EXPECT_EQ(keys.get_element(1), 2); + EXPECT_EQ(keys.get_element(2), 5); +} + +TEST_F(ParquetColumnReaderTest, ReadListWithOverflowAcrossChunks) { + const auto field_idx = find_field_idx("nullable_list_int_col"); + auto reader = create_reader(field_idx); + MutableColumnPtr column = reader->type()->create_column(); + + int64_t rows_read = 0; + auto st = reader->read(2, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 2); + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + _expected_by_field[field_idx](*_fields[field_idx], *column); +} + +TEST_F(ParquetColumnReaderTest, SkipListWithOverflowThenRead) { + const auto field_idx = find_field_idx("nullable_list_int_col"); + auto reader = create_reader(field_idx); + auto st = reader->skip(1); + ASSERT_TRUE(st.ok()) << st; + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_TRUE(nullable_column.is_null_at(0)); + const auto& array_column = assert_cast(nullable_column.get_nested_column()); + const auto& offsets = array_column.get_offsets(); + ASSERT_EQ(offsets.size(), 3); + EXPECT_EQ(offsets[0], 0); + EXPECT_EQ(offsets[1], 0); + EXPECT_EQ(offsets[2], 2); +} + +TEST_F(ParquetColumnReaderTest, SelectListWithOverflow) { + const auto field_idx = find_field_idx("nullable_list_int_col"); + auto reader = create_reader(field_idx); + SelectionVector selection(3); + selection.set_index(0, 0); + selection.set_index(1, 3); + selection.set_index(2, 4); + + MutableColumnPtr column = reader->type()->create_column(); + auto st = reader->select(selection, 3, ROW_COUNT, column); + ASSERT_TRUE(st.ok()) << st; + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + const auto& array_column = assert_cast(nullable_column.get_nested_column()); + const auto& offsets = array_column.get_offsets(); + ASSERT_EQ(offsets.size(), 3); + EXPECT_EQ(offsets[0], 2); + EXPECT_EQ(offsets[1], 4); + EXPECT_EQ(offsets[2], 5); +} + +TEST_F(ParquetColumnReaderTest, ReadStructListWithOverflowAcrossChunks) { + const auto field_idx = find_field_idx("nullable_struct_list_col"); + auto reader = create_reader(field_idx); + MutableColumnPtr column = reader->type()->create_column(); + + int64_t rows_read = 0; + auto st = reader->read(2, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 2); + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + _expected_by_field[field_idx](*_fields[field_idx], *column); +} + +TEST_F(ParquetColumnReaderTest, SkipStructListWithOverflowThenRead) { + const auto field_idx = find_field_idx("nullable_struct_list_col"); + auto reader = create_reader(field_idx); + auto st = reader->skip(1); + ASSERT_TRUE(st.ok()) << st; + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_TRUE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + const auto& struct_column = + assert_cast(nullable_column.get_nested_column()); + const auto& xs_nullable = assert_cast(struct_column.get_column(1)); + ASSERT_EQ(xs_nullable.size(), 3); + EXPECT_FALSE(xs_nullable.is_null_at(1)); + EXPECT_TRUE(xs_nullable.is_null_at(2)); + const auto& xs_array = assert_cast(xs_nullable.get_nested_column()); + const auto& offsets = xs_array.get_offsets(); + ASSERT_EQ(offsets.size(), 3); + EXPECT_EQ(offsets[0], 0); + EXPECT_EQ(offsets[1], 0); + EXPECT_EQ(offsets[2], 0); +} + +TEST_F(ParquetColumnReaderTest, SelectStructListWithOverflow) { + const auto field_idx = find_field_idx("nullable_struct_list_col"); + auto reader = create_reader(field_idx); + SelectionVector selection(3); + selection.set_index(0, 0); + selection.set_index(1, 3); + selection.set_index(2, 4); + + MutableColumnPtr column = reader->type()->create_column(); + auto st = reader->select(selection, 3, ROW_COUNT, column); + ASSERT_TRUE(st.ok()) << st; + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + const auto& struct_column = + assert_cast(nullable_column.get_nested_column()); + const auto& a_values = get_nullable_nested_column(struct_column.get_column(0)); + EXPECT_EQ(a_values.get_element(0), 301); + EXPECT_EQ(a_values.get_element(1), 304); + EXPECT_EQ(a_values.get_element(2), 305); + const auto& xs_nullable = assert_cast(struct_column.get_column(1)); + ASSERT_EQ(xs_nullable.size(), 3); + EXPECT_FALSE(xs_nullable.is_null_at(0)); + EXPECT_TRUE(xs_nullable.is_null_at(1)); + EXPECT_FALSE(xs_nullable.is_null_at(2)); + const auto& xs_array = assert_cast(xs_nullable.get_nested_column()); + const auto& offsets = xs_array.get_offsets(); + ASSERT_EQ(offsets.size(), 3); + EXPECT_EQ(offsets[0], 2); + EXPECT_EQ(offsets[1], 2); + EXPECT_EQ(offsets[2], 4); +} + +TEST_F(ParquetColumnReaderTest, ReadStructMapWithOverflowAcrossChunks) { + const auto field_idx = find_field_idx("nullable_struct_map_col"); + auto reader = create_reader(field_idx); + MutableColumnPtr column = reader->type()->create_column(); + + int64_t rows_read = 0; + auto st = reader->read(2, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 2); + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + _expected_by_field[field_idx](*_fields[field_idx], *column); +} + +TEST_F(ParquetColumnReaderTest, SkipStructMapWithOverflowThenRead) { + const auto field_idx = find_field_idx("nullable_struct_map_col"); + auto reader = create_reader(field_idx); + auto st = reader->skip(1); + ASSERT_TRUE(st.ok()) << st; + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_TRUE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + const auto& struct_column = + assert_cast(nullable_column.get_nested_column()); + const auto& kv_nullable = assert_cast(struct_column.get_column(1)); + ASSERT_EQ(kv_nullable.size(), 3); + EXPECT_FALSE(kv_nullable.is_null_at(1)); + EXPECT_TRUE(kv_nullable.is_null_at(2)); + const auto& kv_map = assert_cast(kv_nullable.get_nested_column()); + const auto& offsets = kv_map.get_offsets(); + ASSERT_EQ(offsets.size(), 3); + EXPECT_EQ(offsets[0], 0); + EXPECT_EQ(offsets[1], 0); + EXPECT_EQ(offsets[2], 0); +} + +TEST_F(ParquetColumnReaderTest, SelectStructMapWithOverflow) { + const auto field_idx = find_field_idx("nullable_struct_map_col"); + auto reader = create_reader(field_idx); + SelectionVector selection(3); + selection.set_index(0, 0); + selection.set_index(1, 3); + selection.set_index(2, 4); + + MutableColumnPtr column = reader->type()->create_column(); + auto st = reader->select(selection, 3, ROW_COUNT, column); + ASSERT_TRUE(st.ok()) << st; + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + const auto& struct_column = + assert_cast(nullable_column.get_nested_column()); + const auto& a_values = get_nullable_nested_column(struct_column.get_column(0)); + EXPECT_EQ(a_values.get_element(0), 401); + EXPECT_EQ(a_values.get_element(1), 404); + EXPECT_EQ(a_values.get_element(2), 405); + const auto& kv_nullable = assert_cast(struct_column.get_column(1)); + ASSERT_EQ(kv_nullable.size(), 3); + EXPECT_FALSE(kv_nullable.is_null_at(0)); + EXPECT_TRUE(kv_nullable.is_null_at(1)); + EXPECT_FALSE(kv_nullable.is_null_at(2)); + const auto& kv_map = assert_cast(kv_nullable.get_nested_column()); + const auto& offsets = kv_map.get_offsets(); + ASSERT_EQ(offsets.size(), 3); + EXPECT_EQ(offsets[0], 2); + EXPECT_EQ(offsets[1], 2); + EXPECT_EQ(offsets[2], 3); + const auto& keys = get_nullable_nested_column(kv_map.get_keys()); + const auto& values = assert_cast(kv_map.get_values()); + const auto& value_data = assert_cast(values.get_nested_column()); + ASSERT_EQ(keys.size(), 3); + EXPECT_EQ(keys.get_element(0), 1); + EXPECT_EQ(keys.get_element(1), 2); + EXPECT_EQ(keys.get_element(2), 5); + EXPECT_EQ(value_data.get_data_at(0).to_string(), "one"); + EXPECT_TRUE(values.is_null_at(1)); + EXPECT_EQ(value_data.get_data_at(2).to_string(), "five"); +} + +TEST_F(ParquetColumnReaderTest, ReadListStructWithOverflowAcrossChunks) { + const auto field_idx = find_field_idx("nullable_list_struct_col"); + auto reader = create_reader(field_idx); + MutableColumnPtr column = reader->type()->create_column(); + + int64_t rows_read = 0; + auto st = reader->read(2, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 2); + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + _expected_by_field[field_idx](*_fields[field_idx], *column); +} + +TEST_F(ParquetColumnReaderTest, SkipListStructWithOverflowThenRead) { + const auto field_idx = find_field_idx("nullable_list_struct_col"); + auto reader = create_reader(field_idx); + auto st = reader->skip(1); + ASSERT_TRUE(st.ok()) << st; + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_TRUE(nullable_column.is_null_at(0)); + const auto& array_column = assert_cast(nullable_column.get_nested_column()); + const auto& offsets = array_column.get_offsets(); + ASSERT_EQ(offsets.size(), 3); + EXPECT_EQ(offsets[0], 0); + EXPECT_EQ(offsets[1], 0); + EXPECT_EQ(offsets[2], 2); +} + +TEST_F(ParquetColumnReaderTest, SelectListStructWithOverflow) { + const auto field_idx = find_field_idx("nullable_list_struct_col"); + auto reader = create_reader(field_idx); + SelectionVector selection(3); + selection.set_index(0, 0); + selection.set_index(1, 3); + selection.set_index(2, 4); + + MutableColumnPtr column = reader->type()->create_column(); + auto st = reader->select(selection, 3, ROW_COUNT, column); + ASSERT_TRUE(st.ok()) << st; + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + const auto& array_column = assert_cast(nullable_column.get_nested_column()); + const auto& offsets = array_column.get_offsets(); + ASSERT_EQ(offsets.size(), 3); + EXPECT_EQ(offsets[0], 2); + EXPECT_EQ(offsets[1], 4); + EXPECT_EQ(offsets[2], 5); +} + +TEST_F(ParquetColumnReaderTest, ReadListListWithOverflowAcrossChunks) { + const auto field_idx = find_field_idx("nullable_list_list_int_col"); + auto reader = create_reader(field_idx); + MutableColumnPtr column = reader->type()->create_column(); + + int64_t rows_read = 0; + auto st = reader->read(2, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 2); + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + _expected_by_field[field_idx](*_fields[field_idx], *column); +} + +TEST_F(ParquetColumnReaderTest, SkipListListWithOverflowThenRead) { + const auto field_idx = find_field_idx("nullable_list_list_int_col"); + auto reader = create_reader(field_idx); + auto st = reader->skip(1); + ASSERT_TRUE(st.ok()) << st; + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_TRUE(nullable_column.is_null_at(0)); + const auto& outer_array = assert_cast(nullable_column.get_nested_column()); + const auto& outer_offsets = outer_array.get_offsets(); + ASSERT_EQ(outer_offsets.size(), 3); + EXPECT_EQ(outer_offsets[0], 0); + EXPECT_EQ(outer_offsets[1], 0); + EXPECT_EQ(outer_offsets[2], 1); + + const auto& inner_nullable = assert_cast(outer_array.get_data()); + ASSERT_EQ(inner_nullable.size(), 1); + EXPECT_FALSE(inner_nullable.is_null_at(0)); + const auto& inner_array = assert_cast(inner_nullable.get_nested_column()); + const auto& inner_offsets = inner_array.get_offsets(); + ASSERT_EQ(inner_offsets.size(), 1); + EXPECT_EQ(inner_offsets[0], 1); +} + +TEST_F(ParquetColumnReaderTest, SelectListListWithOverflow) { + const auto field_idx = find_field_idx("nullable_list_list_int_col"); + auto reader = create_reader(field_idx); + SelectionVector selection(3); + selection.set_index(0, 0); + selection.set_index(1, 3); + selection.set_index(2, 4); + + MutableColumnPtr column = reader->type()->create_column(); + auto st = reader->select(selection, 3, ROW_COUNT, column); + ASSERT_TRUE(st.ok()) << st; + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + const auto& outer_array = assert_cast(nullable_column.get_nested_column()); + const auto& outer_offsets = outer_array.get_offsets(); + ASSERT_EQ(outer_offsets.size(), 3); + EXPECT_EQ(outer_offsets[0], 4); + EXPECT_EQ(outer_offsets[1], 5); + EXPECT_EQ(outer_offsets[2], 7); + + const auto& inner_nullable = assert_cast(outer_array.get_data()); + ASSERT_EQ(inner_nullable.size(), 7); + EXPECT_TRUE(inner_nullable.is_null_at(2)); + const auto& inner_array = assert_cast(inner_nullable.get_nested_column()); + const auto& inner_offsets = inner_array.get_offsets(); + ASSERT_EQ(inner_offsets.size(), 7); + EXPECT_EQ(inner_offsets[0], 2); + EXPECT_EQ(inner_offsets[3], 4); + EXPECT_EQ(inner_offsets[4], 5); + EXPECT_EQ(inner_offsets[6], 7); +} + +TEST_F(ParquetColumnReaderTest, ReadMapWithOverflowAcrossChunks) { + const auto field_idx = find_field_idx("nullable_map_int_string_col"); + auto reader = create_reader(field_idx); + MutableColumnPtr column = reader->type()->create_column(); + + int64_t rows_read = 0; + auto st = reader->read(2, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 2); + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + _expected_by_field[field_idx](*_fields[field_idx], *column); +} + +TEST_F(ParquetColumnReaderTest, SkipMapWithOverflowThenRead) { + const auto field_idx = find_field_idx("nullable_map_int_string_col"); + auto reader = create_reader(field_idx); + auto st = reader->skip(1); + ASSERT_TRUE(st.ok()) << st; + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_TRUE(nullable_column.is_null_at(0)); + const auto& map_column = assert_cast(nullable_column.get_nested_column()); + const auto& offsets = map_column.get_offsets(); + ASSERT_EQ(offsets.size(), 3); + EXPECT_EQ(offsets[0], 0); + EXPECT_EQ(offsets[1], 0); + EXPECT_EQ(offsets[2], 1); +} + +TEST_F(ParquetColumnReaderTest, SelectMapWithOverflow) { + const auto field_idx = find_field_idx("nullable_map_int_string_col"); + auto reader = create_reader(field_idx); + SelectionVector selection(3); + selection.set_index(0, 0); + selection.set_index(1, 3); + selection.set_index(2, 4); + + MutableColumnPtr column = reader->type()->create_column(); + auto st = reader->select(selection, 3, ROW_COUNT, column); + ASSERT_TRUE(st.ok()) << st; + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + const auto& map_column = assert_cast(nullable_column.get_nested_column()); + const auto& offsets = map_column.get_offsets(); + ASSERT_EQ(offsets.size(), 3); + EXPECT_EQ(offsets[0], 2); + EXPECT_EQ(offsets[1], 3); + EXPECT_EQ(offsets[2], 4); +} + +TEST_F(ParquetColumnReaderTest, ReadMapStructWithOverflowAcrossChunks) { + const auto field_idx = find_field_idx("nullable_map_int_struct_col"); + auto reader = create_reader(field_idx); + MutableColumnPtr column = reader->type()->create_column(); + + int64_t rows_read = 0; + auto st = reader->read(2, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 2); + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + _expected_by_field[field_idx](*_fields[field_idx], *column); +} + +TEST_F(ParquetColumnReaderTest, SkipMapStructWithOverflowThenRead) { + const auto field_idx = find_field_idx("nullable_map_int_struct_col"); + auto reader = create_reader(field_idx); + auto st = reader->skip(1); + ASSERT_TRUE(st.ok()) << st; + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_TRUE(nullable_column.is_null_at(0)); + const auto& map_column = assert_cast(nullable_column.get_nested_column()); + const auto& offsets = map_column.get_offsets(); + ASSERT_EQ(offsets.size(), 3); + EXPECT_EQ(offsets[0], 0); + EXPECT_EQ(offsets[1], 0); + EXPECT_EQ(offsets[2], 1); +} + +TEST_F(ParquetColumnReaderTest, SelectMapStructWithOverflow) { + const auto field_idx = find_field_idx("nullable_map_int_struct_col"); + auto reader = create_reader(field_idx); + SelectionVector selection(3); + selection.set_index(0, 0); + selection.set_index(1, 3); + selection.set_index(2, 4); + + MutableColumnPtr column = reader->type()->create_column(); + auto st = reader->select(selection, 3, ROW_COUNT, column); + ASSERT_TRUE(st.ok()) << st; + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + const auto& map_column = assert_cast(nullable_column.get_nested_column()); + const auto& offsets = map_column.get_offsets(); + ASSERT_EQ(offsets.size(), 3); + EXPECT_EQ(offsets[0], 2); + EXPECT_EQ(offsets[1], 3); + EXPECT_EQ(offsets[2], 4); +} + +TEST_F(ParquetColumnReaderTest, ReadMapListWithOverflowAcrossChunks) { + const auto field_idx = find_field_idx("nullable_map_int_list_col"); + auto reader = create_reader(field_idx); + MutableColumnPtr column = reader->type()->create_column(); + + int64_t rows_read = 0; + auto st = reader->read(2, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 2); + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + _expected_by_field[field_idx](*_fields[field_idx], *column); +} + +TEST_F(ParquetColumnReaderTest, SkipMapListWithOverflowThenRead) { + const auto field_idx = find_field_idx("nullable_map_int_list_col"); + auto reader = create_reader(field_idx); + auto st = reader->skip(1); + ASSERT_TRUE(st.ok()) << st; + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + st = reader->read(3, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 3); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_TRUE(nullable_column.is_null_at(0)); + const auto& map_column = assert_cast(nullable_column.get_nested_column()); + const auto& map_offsets = map_column.get_offsets(); + ASSERT_EQ(map_offsets.size(), 3); + EXPECT_EQ(map_offsets[0], 0); + EXPECT_EQ(map_offsets[1], 0); + EXPECT_EQ(map_offsets[2], 2); + + const auto& values = assert_cast(map_column.get_values()); + ASSERT_EQ(values.size(), 2); + EXPECT_TRUE(values.is_null_at(0)); + EXPECT_FALSE(values.is_null_at(1)); + const auto& list_column = assert_cast(values.get_nested_column()); + const auto& list_offsets = list_column.get_offsets(); + ASSERT_EQ(list_offsets.size(), 2); + EXPECT_EQ(list_offsets[0], 0); + EXPECT_EQ(list_offsets[1], 2); +} + +TEST_F(ParquetColumnReaderTest, SelectMapListWithOverflow) { + const auto field_idx = find_field_idx("nullable_map_int_list_col"); + auto reader = create_reader(field_idx); + SelectionVector selection(3); + selection.set_index(0, 0); + selection.set_index(1, 3); + selection.set_index(2, 4); + + MutableColumnPtr column = reader->type()->create_column(); + auto st = reader->select(selection, 3, ROW_COUNT, column); + ASSERT_TRUE(st.ok()) << st; + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + const auto& map_column = assert_cast(nullable_column.get_nested_column()); + const auto& map_offsets = map_column.get_offsets(); + ASSERT_EQ(map_offsets.size(), 3); + EXPECT_EQ(map_offsets[0], 2); + EXPECT_EQ(map_offsets[1], 4); + EXPECT_EQ(map_offsets[2], 5); + + const auto& values = assert_cast(map_column.get_values()); + ASSERT_EQ(values.size(), 5); + EXPECT_FALSE(values.is_null_at(0)); + EXPECT_TRUE(values.is_null_at(2)); + EXPECT_FALSE(values.is_null_at(4)); + const auto& list_column = assert_cast(values.get_nested_column()); + const auto& list_offsets = list_column.get_offsets(); + ASSERT_EQ(list_offsets.size(), 5); + EXPECT_EQ(list_offsets[0], 2); + EXPECT_EQ(list_offsets[1], 2); + EXPECT_EQ(list_offsets[2], 2); + EXPECT_EQ(list_offsets[3], 4); + EXPECT_EQ(list_offsets[4], 5); +} + +TEST_F(ParquetColumnReaderTest, ReadDeepListStructMapListAcrossChunks) { + const auto field_idx = find_field_idx("nullable_list_struct_map_list_col"); + auto reader = create_reader(field_idx); + MutableColumnPtr column = reader->type()->create_column(); + + int64_t rows_read = 0; + auto st = reader->read(1, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 1); + st = reader->read(2, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 2); + st = reader->read(2, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 2); + + _expected_by_field[field_idx](*_fields[field_idx], *column); +} + +TEST_F(ParquetColumnReaderTest, SkipDeepListStructMapListThenRead) { + const auto field_idx = find_field_idx("nullable_list_struct_map_list_col"); + auto reader = create_reader(field_idx); + auto st = reader->skip(1); + ASSERT_TRUE(st.ok()) << st; + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + st = reader->read(4, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 4); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 4); + EXPECT_TRUE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + + const auto& outer_array = assert_cast(nullable_column.get_nested_column()); + const auto& outer_offsets = outer_array.get_offsets(); + ASSERT_EQ(outer_offsets.size(), 4); + EXPECT_EQ(outer_offsets[0], 0); + EXPECT_EQ(outer_offsets[1], 0); + EXPECT_EQ(outer_offsets[2], 2); + EXPECT_EQ(outer_offsets[3], 3); + + const auto& struct_values = assert_cast(outer_array.get_data()); + ASSERT_EQ(struct_values.size(), 3); + EXPECT_FALSE(struct_values.is_null_at(0)); + EXPECT_FALSE(struct_values.is_null_at(1)); + EXPECT_FALSE(struct_values.is_null_at(2)); + const auto& struct_column = assert_cast(struct_values.get_nested_column()); + const auto& map_values = assert_cast(struct_column.get_column(0)); + ASSERT_EQ(map_values.size(), 3); + EXPECT_TRUE(map_values.is_null_at(0)); + EXPECT_FALSE(map_values.is_null_at(1)); + EXPECT_FALSE(map_values.is_null_at(2)); + + const auto& map_column = assert_cast(map_values.get_nested_column()); + const auto& map_offsets = map_column.get_offsets(); + ASSERT_EQ(map_offsets.size(), 3); + EXPECT_EQ(map_offsets[0], 0); + EXPECT_EQ(map_offsets[1], 0); + EXPECT_EQ(map_offsets[2], 2); + const auto& keys = get_nullable_nested_column(map_column.get_keys()); + ASSERT_EQ(keys.size(), 2); + EXPECT_EQ(keys.get_element(0), 3); + EXPECT_EQ(keys.get_element(1), 4); + const auto& lists = assert_cast(map_column.get_values()); + ASSERT_EQ(lists.size(), 2); + EXPECT_TRUE(lists.is_null_at(0)); + EXPECT_FALSE(lists.is_null_at(1)); + const auto& list_column = assert_cast(lists.get_nested_column()); + const auto& list_offsets = list_column.get_offsets(); + ASSERT_EQ(list_offsets.size(), 2); + EXPECT_EQ(list_offsets[0], 0); + EXPECT_EQ(list_offsets[1], 1); +} + +TEST_F(ParquetColumnReaderTest, SelectDeepListStructMapList) { + const auto field_idx = find_field_idx("nullable_list_struct_map_list_col"); + auto reader = create_reader(field_idx); + SelectionVector selection(3); + selection.set_index(0, 0); + selection.set_index(1, 3); + selection.set_index(2, 4); + + MutableColumnPtr column = reader->type()->create_column(); + auto st = reader->select(selection, 3, ROW_COUNT, column); + ASSERT_TRUE(st.ok()) << st; + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + const auto& outer_array = assert_cast(nullable_column.get_nested_column()); + const auto& outer_offsets = outer_array.get_offsets(); + ASSERT_EQ(outer_offsets.size(), 3); + EXPECT_EQ(outer_offsets[0], 2); + EXPECT_EQ(outer_offsets[1], 4); + EXPECT_EQ(outer_offsets[2], 5); + + const auto& struct_values = assert_cast(outer_array.get_data()); + ASSERT_EQ(struct_values.size(), 5); + EXPECT_FALSE(struct_values.is_null_at(0)); + EXPECT_TRUE(struct_values.is_null_at(1)); + EXPECT_FALSE(struct_values.is_null_at(2)); + EXPECT_FALSE(struct_values.is_null_at(3)); + EXPECT_FALSE(struct_values.is_null_at(4)); + const auto& struct_column = assert_cast(struct_values.get_nested_column()); + const auto& map_values = assert_cast(struct_column.get_column(0)); + ASSERT_EQ(map_values.size(), 5); + EXPECT_FALSE(map_values.is_null_at(0)); + EXPECT_TRUE(map_values.is_null_at(1)); + EXPECT_TRUE(map_values.is_null_at(2)); + EXPECT_FALSE(map_values.is_null_at(3)); + EXPECT_FALSE(map_values.is_null_at(4)); + const auto& map_column = assert_cast(map_values.get_nested_column()); + const auto& map_offsets = map_column.get_offsets(); + ASSERT_EQ(map_offsets.size(), 5); + EXPECT_EQ(map_offsets[0], 2); + EXPECT_EQ(map_offsets[1], 2); + EXPECT_EQ(map_offsets[2], 2); + EXPECT_EQ(map_offsets[3], 2); + EXPECT_EQ(map_offsets[4], 4); +} + +TEST_F(ParquetColumnReaderTest, ReadDeepMapListMapAcrossChunks) { + const auto field_idx = find_field_idx("nullable_map_int_list_map_int_string_col"); + auto reader = create_reader(field_idx); + MutableColumnPtr column = reader->type()->create_column(); + + int64_t rows_read = 0; + auto st = reader->read(1, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 1); + st = reader->read(2, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 2); + st = reader->read(2, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 2); + + _expected_by_field[field_idx](*_fields[field_idx], *column); +} + +TEST_F(ParquetColumnReaderTest, SkipDeepMapListMapThenRead) { + const auto field_idx = find_field_idx("nullable_map_int_list_map_int_string_col"); + auto reader = create_reader(field_idx); + auto st = reader->skip(1); + ASSERT_TRUE(st.ok()) << st; + + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + st = reader->read(4, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, 4); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 4); + EXPECT_TRUE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + const auto& outer_map = assert_cast(nullable_column.get_nested_column()); + const auto& outer_offsets = outer_map.get_offsets(); + ASSERT_EQ(outer_offsets.size(), 4); + EXPECT_EQ(outer_offsets[0], 0); + EXPECT_EQ(outer_offsets[1], 0); + EXPECT_EQ(outer_offsets[2], 2); + EXPECT_EQ(outer_offsets[3], 3); + const auto& outer_keys = get_nullable_nested_column(outer_map.get_keys()); + ASSERT_EQ(outer_keys.size(), 3); + EXPECT_EQ(outer_keys.get_element(0), 30); + EXPECT_EQ(outer_keys.get_element(1), 40); + EXPECT_EQ(outer_keys.get_element(2), 50); + + const auto& lists = assert_cast(outer_map.get_values()); + ASSERT_EQ(lists.size(), 3); + EXPECT_TRUE(lists.is_null_at(0)); + EXPECT_FALSE(lists.is_null_at(1)); + EXPECT_FALSE(lists.is_null_at(2)); + const auto& list_column = assert_cast(lists.get_nested_column()); + const auto& list_offsets = list_column.get_offsets(); + ASSERT_EQ(list_offsets.size(), 3); + EXPECT_EQ(list_offsets[0], 0); + EXPECT_EQ(list_offsets[1], 1); + EXPECT_EQ(list_offsets[2], 3); + const auto& inner_maps = assert_cast(list_column.get_data()); + ASSERT_EQ(inner_maps.size(), 3); + EXPECT_FALSE(inner_maps.is_null_at(0)); + EXPECT_TRUE(inner_maps.is_null_at(1)); + EXPECT_FALSE(inner_maps.is_null_at(2)); +} + +TEST_F(ParquetColumnReaderTest, SelectDeepMapListMap) { + const auto field_idx = find_field_idx("nullable_map_int_list_map_int_string_col"); + auto reader = create_reader(field_idx); + SelectionVector selection(3); + selection.set_index(0, 0); + selection.set_index(1, 3); + selection.set_index(2, 4); + + MutableColumnPtr column = reader->type()->create_column(); + auto st = reader->select(selection, 3, ROW_COUNT, column); + ASSERT_TRUE(st.ok()) << st; + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 3); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + const auto& outer_map = assert_cast(nullable_column.get_nested_column()); + const auto& outer_offsets = outer_map.get_offsets(); + ASSERT_EQ(outer_offsets.size(), 3); + EXPECT_EQ(outer_offsets[0], 2); + EXPECT_EQ(outer_offsets[1], 4); + EXPECT_EQ(outer_offsets[2], 5); + const auto& outer_keys = get_nullable_nested_column(outer_map.get_keys()); + ASSERT_EQ(outer_keys.size(), 5); + EXPECT_EQ(outer_keys.get_element(0), 10); + EXPECT_EQ(outer_keys.get_element(1), 20); + EXPECT_EQ(outer_keys.get_element(2), 30); + EXPECT_EQ(outer_keys.get_element(3), 40); + EXPECT_EQ(outer_keys.get_element(4), 50); + + const auto& lists = assert_cast(outer_map.get_values()); + ASSERT_EQ(lists.size(), 5); + EXPECT_FALSE(lists.is_null_at(0)); + EXPECT_FALSE(lists.is_null_at(1)); + EXPECT_TRUE(lists.is_null_at(2)); + EXPECT_FALSE(lists.is_null_at(3)); + EXPECT_FALSE(lists.is_null_at(4)); + const auto& list_column = assert_cast(lists.get_nested_column()); + const auto& list_offsets = list_column.get_offsets(); + ASSERT_EQ(list_offsets.size(), 5); + EXPECT_EQ(list_offsets[0], 3); + EXPECT_EQ(list_offsets[1], 3); + EXPECT_EQ(list_offsets[2], 3); + EXPECT_EQ(list_offsets[3], 4); + EXPECT_EQ(list_offsets[4], 6); +} + +} // namespace +} // namespace doris::format::parquet diff --git a/be/test/format_v2/parquet/parquet_leaf_reader_test.cpp b/be/test/format_v2/parquet/parquet_leaf_reader_test.cpp new file mode 100644 index 00000000000000..0d0f9a2f8567cc --- /dev/null +++ b/be/test/format_v2/parquet/parquet_leaf_reader_test.cpp @@ -0,0 +1,506 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/reader/parquet_leaf_reader.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "core/assert_cast.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" + +namespace doris::format::parquet { +namespace { + +std::shared_ptr fixed_binary_array(const std::vector& values, + int byte_width) { + auto type = arrow::fixed_size_binary(byte_width); + arrow::FixedSizeBinaryBuilder builder(type, arrow::default_memory_pool()); + for (const auto& value : values) { + EXPECT_TRUE(builder.Append(reinterpret_cast(value.data())).ok()); + } + std::shared_ptr array; + EXPECT_TRUE(builder.Finish(&array).ok()); + return array; +} + +ParquetLeafReader make_leaf_reader(ParquetTypeDescriptor descriptor, DataTypePtr type) { + return ParquetLeafReader(nullptr, descriptor, std::move(type), "leaf", nullptr); +} + +struct CapturedDecodedView { + DecodedValueKind value_kind = DecodedValueKind::INT32; + DecodedTimeUnit time_unit = DecodedTimeUnit::UNKNOWN; + int64_t row_count = 0; + int decimal_precision = -1; + int decimal_scale = -1; + int fixed_length = -1; + bool timestamp_is_adjusted_to_utc = false; + bool enable_strict_mode = false; + const cctz::time_zone* timezone = nullptr; + bool null_map_is_null = true; + std::vector null_map; + std::vector fixed_values; + std::vector binary_values; + std::vector owned_binary_values; +}; + +ParquetLeafReader make_spy_leaf_reader(ParquetTypeDescriptor descriptor, DataTypePtr type, + CapturedDecodedView* captured, + const cctz::time_zone* timezone = nullptr, + bool enable_strict_mode = false) { + auto appender = [captured](MutableColumnPtr&, const DecodedColumnView& view) { + captured->value_kind = view.value_kind; + captured->time_unit = view.time_unit; + captured->row_count = view.row_count; + captured->decimal_precision = view.decimal_precision; + captured->decimal_scale = view.decimal_scale; + captured->fixed_length = view.fixed_length; + captured->timestamp_is_adjusted_to_utc = view.timestamp_is_adjusted_to_utc; + captured->enable_strict_mode = view.enable_strict_mode; + captured->timezone = view.timezone; + captured->null_map_is_null = view.null_map == nullptr; + captured->null_map.clear(); + if (view.null_map != nullptr) { + captured->null_map.assign(view.null_map, view.null_map + view.row_count); + } + captured->fixed_values.clear(); + if (view.values != nullptr && view.value_kind == DecodedValueKind::INT64) { + captured->fixed_values.assign(view.values, view.values + view.row_count * 8); + } else if (view.values != nullptr && view.value_kind == DecodedValueKind::FLOAT) { + captured->fixed_values.assign(view.values, view.values + view.row_count * 4); + } else if (view.values != nullptr && view.value_kind == DecodedValueKind::INT32) { + captured->fixed_values.assign(view.values, view.values + view.row_count * 4); + } + captured->binary_values.clear(); + captured->owned_binary_values.clear(); + if (view.binary_values != nullptr) { + captured->owned_binary_values.reserve(view.binary_values->size()); + for (const auto& value : *view.binary_values) { + captured->owned_binary_values.emplace_back( + value.data == nullptr ? std::string() + : std::string(value.data, value.size)); + } + captured->binary_values.reserve(captured->owned_binary_values.size()); + for (const auto& value : captured->owned_binary_values) { + captured->binary_values.emplace_back(value.data(), value.size()); + } + } + return Status::OK(); + }; + return ParquetLeafReader(nullptr, descriptor, std::move(type), "leaf", nullptr, {}, timezone, + enable_strict_mode, std::move(appender)); +} + +} // namespace + +struct ParquetLeafReaderTestAccess { + static ParquetLeafBatch make_fixed_batch(const std::vector& def_levels, + const std::vector& rep_levels, + const std::vector& values, + bool read_dense_for_nullable = false) { + ParquetLeafBatch batch; + batch._value_kind = DecodedValueKind::INT32; + batch._consumed_level_count = static_cast(def_levels.size()); + batch._decoded_level_count = static_cast(def_levels.size()); + batch._values_written = static_cast(values.size()); + batch._def_levels = def_levels.data(); + batch._rep_levels = rep_levels.data(); + batch._fixed_values = reinterpret_cast(values.data()); + batch._read_dense_for_nullable = read_dense_for_nullable; + return batch; + } + + static Status build_nested_batch(const ParquetLeafReader& reader, + const ParquetLeafBatch& leaf_batch, int64_t records_read, + int16_t value_slot_definition_level, + int16_t value_slot_repetition_level, + ParquetNestedScalarBatch* nested_batch) { + return reader.build_nested_batch_from_leaf_batch(leaf_batch, records_read, + value_slot_definition_level, nested_batch, + value_slot_repetition_level); + } +}; + +std::shared_ptr<::parquet::ColumnDescriptor> int32_column_descriptor(int16_t max_definition_level, + int16_t max_repetition_level) { + auto node = ::parquet::schema::PrimitiveNode::Make("leaf", ::parquet::Repetition::OPTIONAL, + ::parquet::Type::INT32); + return std::make_shared<::parquet::ColumnDescriptor>(node, max_definition_level, + max_repetition_level); +} + +ParquetLeafReader make_nested_leaf_reader( + const std::shared_ptr<::parquet::ColumnDescriptor>& descriptor, DataTypePtr type) { + ParquetTypeDescriptor type_descriptor; + type_descriptor.physical_type = ::parquet::Type::INT32; + type_descriptor.doris_type = type; + return ParquetLeafReader(descriptor.get(), type_descriptor, std::move(type), "nested_leaf", + nullptr); +} + +TEST(ParquetLeafReaderTest, DenseNullableFixedValuesAreSpacedBeforeSerde) { + ParquetTypeDescriptor descriptor; + descriptor.physical_type = ::parquet::Type::INT32; + auto type = make_nullable(std::make_shared()); + auto reader = make_leaf_reader(descriptor, type); + + const std::vector compact_values = {10, 30, 50}; + ParquetLeafBatch batch; + batch._value_kind = DecodedValueKind::INT32; + batch._fixed_values = reinterpret_cast(compact_values.data()); + batch._values_written = compact_values.size(); + batch._read_dense_for_nullable = true; + + const NullMap null_map = {0, 1, 0, 1, 0}; + auto column = type->create_column(); + auto status = reader.append_values(batch, 5, &null_map, column); + ASSERT_TRUE(status.ok()) << status; + + const auto& nullable = assert_cast(*column); + ASSERT_EQ(nullable.size(), 5); + EXPECT_FALSE(nullable.is_null_at(0)); + EXPECT_TRUE(nullable.is_null_at(1)); + EXPECT_FALSE(nullable.is_null_at(2)); + EXPECT_TRUE(nullable.is_null_at(3)); + EXPECT_FALSE(nullable.is_null_at(4)); + const auto& nested = assert_cast(nullable.get_nested_column()); + EXPECT_EQ(nested.get_element(0), 10); + EXPECT_EQ(nested.get_element(2), 30); + EXPECT_EQ(nested.get_element(4), 50); +} + +TEST(ParquetLeafReaderTest, DenseNullableFixedValuesRejectCountMismatch) { + ParquetTypeDescriptor descriptor; + descriptor.physical_type = ::parquet::Type::INT32; + auto type = make_nullable(std::make_shared()); + auto reader = make_leaf_reader(descriptor, type); + + const std::vector compact_values = {10, 30}; + ParquetLeafBatch batch; + batch._value_kind = DecodedValueKind::INT32; + batch._fixed_values = reinterpret_cast(compact_values.data()); + batch._values_written = compact_values.size(); + batch._read_dense_for_nullable = true; + + const NullMap null_map = {0, 1, 0, 1, 0}; + auto column = type->create_column(); + auto status = reader.append_values(batch, 5, &null_map, column); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("Invalid dense nullable parquet values"), std::string::npos); +} + +TEST(ParquetLeafReaderTest, Float16BinaryValuesAreConvertedToFloat) { + ParquetTypeDescriptor descriptor; + descriptor.physical_type = ::parquet::Type::FIXED_LEN_BYTE_ARRAY; + descriptor.extra_type_info = ParquetExtraTypeInfo::FLOAT16; + descriptor.fixed_length = 2; + auto type = std::make_shared(); + auto reader = make_leaf_reader(descriptor, type); + + auto half = [](uint16_t value) { + std::string bytes(sizeof(value), '\0'); + memcpy(bytes.data(), &value, sizeof(value)); + return bytes; + }; + + ParquetLeafBatch batch; + batch._value_kind = DecodedValueKind::FIXED_BINARY; + batch._binary_chunks = {fixed_binary_array( + {half(0x0000), half(0x8000), half(0x3E00), half(0x0001), half(0x7E00)}, 2)}; + batch._values_written = 5; + + auto column = type->create_column(); + auto status = reader.append_values(batch, 5, nullptr, column); + ASSERT_TRUE(status.ok()) << status; + + const auto& floats = assert_cast(*column); + ASSERT_EQ(floats.size(), 5); + EXPECT_FLOAT_EQ(floats.get_element(0), 0.0F); + EXPECT_TRUE(std::signbit(floats.get_element(1))); + EXPECT_FLOAT_EQ(floats.get_element(2), 1.5F); + EXPECT_NEAR(floats.get_element(3), 5.9604645e-8F, 1e-12F); + EXPECT_TRUE(std::isnan(floats.get_element(4))); +} + +TEST(ParquetLeafReaderTest, BinaryDenseNullableValuesAreSpacedWithNullRefs) { + ParquetTypeDescriptor descriptor; + descriptor.physical_type = ::parquet::Type::BYTE_ARRAY; + auto type = make_nullable(std::make_shared()); + auto reader = make_leaf_reader(descriptor, type); + + arrow::BinaryBuilder builder; + ASSERT_TRUE(builder.Append("aa").ok()); + ASSERT_TRUE(builder.Append("cc").ok()); + ASSERT_TRUE(builder.Append("ee").ok()); + std::shared_ptr array; + ASSERT_TRUE(builder.Finish(&array).ok()); + + ParquetLeafBatch batch; + batch._value_kind = DecodedValueKind::BINARY; + batch._binary_chunks = {array}; + batch._values_written = 3; + batch._read_dense_for_nullable = true; + + const NullMap null_map = {0, 1, 0, 1, 0}; + auto column = type->create_column(); + auto status = reader.append_values(batch, 5, &null_map, column); + ASSERT_TRUE(status.ok()) << status; + + const auto& nullable = assert_cast(*column); + const auto& strings = assert_cast(nullable.get_nested_column()); + ASSERT_EQ(nullable.size(), 5); + EXPECT_EQ(strings.get_data_at(0).to_string(), "aa"); + EXPECT_TRUE(nullable.is_null_at(1)); + EXPECT_EQ(strings.get_data_at(2).to_string(), "cc"); + EXPECT_TRUE(nullable.is_null_at(3)); + EXPECT_EQ(strings.get_data_at(4).to_string(), "ee"); +} + +TEST(ParquetLeafReaderTest, BinaryDenseNullableRejectsCountMismatch) { + ParquetTypeDescriptor descriptor; + descriptor.physical_type = ::parquet::Type::BYTE_ARRAY; + auto type = make_nullable(std::make_shared()); + auto reader = make_leaf_reader(descriptor, type); + + arrow::BinaryBuilder builder; + ASSERT_TRUE(builder.Append("only_one").ok()); + std::shared_ptr array; + ASSERT_TRUE(builder.Finish(&array).ok()); + + ParquetLeafBatch batch; + batch._value_kind = DecodedValueKind::BINARY; + batch._binary_chunks = {array}; + batch._values_written = 1; + batch._read_dense_for_nullable = true; + + const NullMap null_map = {0, 1, 0}; + auto column = type->create_column(); + auto status = reader.append_values(batch, 3, &null_map, column); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("Invalid dense nullable parquet binary values"), + std::string::npos); +} + +TEST(ParquetLeafReaderTest, DecodedColumnViewCarriesDescriptorSessionAndNullMapFields) { + ParquetTypeDescriptor descriptor; + descriptor.physical_type = ::parquet::Type::INT64; + descriptor.time_unit = ParquetTimeUnit::NANOS; + descriptor.decimal_precision = 18; + descriptor.decimal_scale = 4; + descriptor.fixed_length = 12; + descriptor.timestamp_is_adjusted_to_utc = true; + auto type = make_nullable(std::make_shared()); + cctz::time_zone shanghai; + ASSERT_TRUE(cctz::load_time_zone("Asia/Shanghai", &shanghai)); + + CapturedDecodedView captured; + auto reader = make_spy_leaf_reader(descriptor, type, &captured, &shanghai, true); + const std::vector values = {100, 200, 300}; + ParquetLeafBatch batch; + batch._value_kind = DecodedValueKind::INT64; + batch._fixed_values = reinterpret_cast(values.data()); + batch._values_written = values.size(); + + const NullMap null_map = {0, 1, 0}; + auto column = type->create_column(); + ASSERT_TRUE(reader.append_values(batch, 3, &null_map, column).ok()); + EXPECT_EQ(captured.value_kind, DecodedValueKind::INT64); + EXPECT_EQ(captured.time_unit, DecodedTimeUnit::NANOS); + EXPECT_EQ(captured.row_count, 3); + EXPECT_EQ(captured.decimal_precision, 18); + EXPECT_EQ(captured.decimal_scale, 4); + EXPECT_EQ(captured.fixed_length, 12); + EXPECT_TRUE(captured.timestamp_is_adjusted_to_utc); + EXPECT_TRUE(captured.enable_strict_mode); + EXPECT_EQ(captured.timezone, &shanghai); + EXPECT_FALSE(captured.null_map_is_null); + EXPECT_EQ(captured.null_map, std::vector({0, 1, 0})); + + auto required_column = type->create_column(); + ASSERT_TRUE(reader.append_values(batch, 3, nullptr, required_column).ok()); + EXPECT_TRUE(captured.null_map_is_null); + + const NullMap empty_null_map; + ASSERT_TRUE(reader.append_values(batch, 3, &empty_null_map, required_column).ok()); + EXPECT_TRUE(captured.null_map_is_null); +} + +TEST(ParquetLeafReaderTest, DecodedColumnViewCapturesBinaryFixedLengthAndFloat16Override) { + ParquetTypeDescriptor binary_descriptor; + binary_descriptor.physical_type = ::parquet::Type::FIXED_LEN_BYTE_ARRAY; + binary_descriptor.fixed_length = 4; + auto type = std::make_shared(); + + CapturedDecodedView binary_view; + auto binary_reader = make_spy_leaf_reader(binary_descriptor, type, &binary_view); + ParquetLeafBatch binary_batch; + binary_batch._value_kind = DecodedValueKind::FIXED_BINARY; + binary_batch._binary_chunks = {fixed_binary_array({"abcd", "wxyz"}, 4)}; + binary_batch._values_written = 2; + auto binary_column = type->create_column(); + ASSERT_TRUE(binary_reader.append_values(binary_batch, 2, nullptr, binary_column).ok()); + EXPECT_EQ(binary_view.value_kind, DecodedValueKind::FIXED_BINARY); + EXPECT_EQ(binary_view.fixed_length, 4); + ASSERT_EQ(binary_view.owned_binary_values.size(), 2); + EXPECT_EQ(binary_view.owned_binary_values[0], "abcd"); + EXPECT_EQ(binary_view.owned_binary_values[1], "wxyz"); + + ParquetTypeDescriptor float16_descriptor; + float16_descriptor.physical_type = ::parquet::Type::FIXED_LEN_BYTE_ARRAY; + float16_descriptor.extra_type_info = ParquetExtraTypeInfo::FLOAT16; + float16_descriptor.fixed_length = 2; + CapturedDecodedView float16_view; + auto float16_reader = make_spy_leaf_reader(float16_descriptor, + std::make_shared(), &float16_view); + auto half = [](uint16_t value) { + std::string bytes(sizeof(value), '\0'); + memcpy(bytes.data(), &value, sizeof(value)); + return bytes; + }; + ParquetLeafBatch float16_batch; + float16_batch._value_kind = DecodedValueKind::FIXED_BINARY; + float16_batch._binary_chunks = {fixed_binary_array({half(0x3E00), half(0x4000)}, 2)}; + float16_batch._values_written = 2; + auto float16_column = std::make_shared()->create_column(); + ASSERT_TRUE(float16_reader.append_values(float16_batch, 2, nullptr, float16_column).ok()); + EXPECT_EQ(float16_view.value_kind, DecodedValueKind::FLOAT); + ASSERT_EQ(float16_view.fixed_values.size(), sizeof(float) * 2); + const auto* floats = reinterpret_cast(float16_view.fixed_values.data()); + EXPECT_FLOAT_EQ(floats[0], 1.5F); + EXPECT_FLOAT_EQ(floats[1], 2.0F); +} + +TEST(ParquetLeafReaderTest, NestedBatchValueLayoutLevels) { + auto descriptor = int32_column_descriptor(2, 1); + auto reader = make_nested_leaf_reader(descriptor, std::make_shared()); + const std::vector def_levels = {2, 2, 2}; + const std::vector rep_levels = {0, 1, 0}; + const std::vector values = {10, 20, 30}; + const auto leaf_batch = + ParquetLeafReaderTestAccess::make_fixed_batch(def_levels, rep_levels, values); + + ParquetNestedScalarBatch nested_batch; + auto status = ParquetLeafReaderTestAccess::build_nested_batch(reader, leaf_batch, 2, 2, 1, + &nested_batch); + ASSERT_TRUE(status.ok()) << status; + EXPECT_EQ(nested_batch.records_read, 2); + EXPECT_EQ(nested_batch.levels_written, 3); + EXPECT_EQ(nested_batch.value_indices, std::vector({0, 1, 2})); + const auto& nested_values = assert_cast(*nested_batch.values_column); + ASSERT_EQ(nested_values.size(), 3); + EXPECT_EQ(nested_values.get_element(0), 10); + EXPECT_EQ(nested_values.get_element(2), 30); +} + +TEST(ParquetLeafReaderTest, NestedBatchValueLayoutValueSlots) { + auto descriptor = int32_column_descriptor(2, 1); + auto reader = make_nested_leaf_reader(descriptor, std::make_shared()); + const std::vector def_levels = {2, 1, 2, 0}; + const std::vector rep_levels = {0, 1, 0, 0}; + const std::vector values = {10, 777, 30}; + const auto leaf_batch = + ParquetLeafReaderTestAccess::make_fixed_batch(def_levels, rep_levels, values); + + ParquetNestedScalarBatch nested_batch; + auto status = ParquetLeafReaderTestAccess::build_nested_batch(reader, leaf_batch, 3, 1, 1, + &nested_batch); + ASSERT_TRUE(status.ok()) << status; + EXPECT_EQ(nested_batch.value_indices, std::vector({0, -1, 2, -1})); +} + +TEST(ParquetLeafReaderTest, NestedBatchValueLayoutLeafValues) { + auto descriptor = int32_column_descriptor(2, 1); + auto reader = make_nested_leaf_reader(descriptor, std::make_shared()); + const std::vector def_levels = {2, 1, 2, 0}; + const std::vector rep_levels = {0, 1, 0, 0}; + const std::vector values = {10, 30}; + const auto leaf_batch = + ParquetLeafReaderTestAccess::make_fixed_batch(def_levels, rep_levels, values); + + ParquetNestedScalarBatch nested_batch; + auto status = ParquetLeafReaderTestAccess::build_nested_batch(reader, leaf_batch, 3, 1, 1, + &nested_batch); + ASSERT_TRUE(status.ok()) << status; + EXPECT_EQ(nested_batch.value_indices, std::vector({0, -1, 1, -1})); +} + +TEST(ParquetLeafReaderTest, NestedBatchValueLayoutPayloadSlots) { + auto descriptor = int32_column_descriptor(2, 1); + auto reader = make_nested_leaf_reader(descriptor, std::make_shared()); + const std::vector def_levels = {1, 2, 0, 2}; + const std::vector rep_levels = {0, 0, 0, 0}; + const std::vector values = {777, 10, 30}; + const auto leaf_batch = + ParquetLeafReaderTestAccess::make_fixed_batch(def_levels, rep_levels, values); + + ParquetNestedScalarBatch nested_batch; + auto status = ParquetLeafReaderTestAccess::build_nested_batch(reader, leaf_batch, 4, 2, 1, + &nested_batch); + ASSERT_TRUE(status.ok()) << status; + EXPECT_EQ(nested_batch.value_indices, std::vector({-1, 1, -1, 2})); +} + +TEST(ParquetLeafReaderTest, NestedBatchRejectsMismatchedValueLayout) { + auto descriptor = int32_column_descriptor(2, 1); + auto reader = make_nested_leaf_reader(descriptor, std::make_shared()); + const std::vector def_levels = {2, 0, 2, 0}; + const std::vector rep_levels = {0, 0, 0, 0}; + const std::vector values = {10, 20, 30}; + const auto leaf_batch = + ParquetLeafReaderTestAccess::make_fixed_batch(def_levels, rep_levels, values); + + ParquetNestedScalarBatch nested_batch; + const auto status = ParquetLeafReaderTestAccess::build_nested_batch(reader, leaf_batch, 4, 2, 1, + &nested_batch); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("inconsistent value count"), std::string::npos); +} + +TEST(ParquetLeafReaderTest, NestedBatchRejectsDenseNullable) { + auto descriptor = int32_column_descriptor(1, 0); + auto reader = + make_nested_leaf_reader(descriptor, make_nullable(std::make_shared())); + const std::vector def_levels = {1}; + const std::vector rep_levels = {0}; + const std::vector values = {10}; + const auto leaf_batch = + ParquetLeafReaderTestAccess::make_fixed_batch(def_levels, rep_levels, values, true); + + ParquetNestedScalarBatch nested_batch; + const auto status = ParquetLeafReaderTestAccess::build_nested_batch(reader, leaf_batch, 1, 0, 0, + &nested_batch); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("Dense nullable parquet nested reader is not supported"), + std::string::npos); +} + +} // namespace doris::format::parquet diff --git a/be/test/format_v2/parquet/parquet_page_cache_range_test.cpp b/be/test/format_v2/parquet/parquet_page_cache_range_test.cpp new file mode 100644 index 00000000000000..f8e12206bb1220 --- /dev/null +++ b/be/test/format_v2/parquet/parquet_page_cache_range_test.cpp @@ -0,0 +1,117 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include + +#include "format_v2/parquet/parquet_file_context.h" + +namespace doris::format::parquet { +namespace { + +void expect_plan_entry(const ParquetPageCacheReadPlanEntry& entry, + const ParquetPageCacheRange& cached_range, int64_t copy_offset_in_cache, + int64_t output_offset, int64_t copy_size) { + EXPECT_EQ(entry.cached_range.offset, cached_range.offset); + EXPECT_EQ(entry.cached_range.size, cached_range.size); + EXPECT_EQ(entry.copy_offset_in_cache, copy_offset_in_cache); + EXPECT_EQ(entry.output_offset, output_offset); + EXPECT_EQ(entry.copy_size, copy_size); +} + +TEST(ParquetPageCacheRangeTest, SubsetRequestHitsSingleCachedRange) { + const std::vector cached_ranges = { + {100, 100}, + }; + + // Request [120, 150) is fully inside cached [100, 200). The reader should lookup + // the exact cached key [100, 200), then copy from cached offset 20 into output offset 0. + auto plan = detail::plan_page_cache_range_read(120, 30, cached_ranges); + + ASSERT_EQ(plan.size(), 1); + expect_plan_entry(plan[0], {100, 100}, 20, 0, 30); +} + +TEST(ParquetPageCacheRangeTest, SupersetRequestHitsMultipleAdjacentCachedRanges) { + const std::vector cached_ranges = { + {180, 80}, + {100, 80}, + }; + + // Request [100, 260) is larger than either cached entry, but the two cached ranges + // exactly cover it. The copy plan stitches the two exact cache entries together. + auto plan = detail::plan_page_cache_range_read(100, 160, cached_ranges); + + ASSERT_EQ(plan.size(), 2); + expect_plan_entry(plan[0], {100, 80}, 0, 0, 80); + expect_plan_entry(plan[1], {180, 80}, 0, 80, 80); +} + +TEST(ParquetPageCacheRangeTest, SupersetRequestCanUseOverlappingCachedRanges) { + const std::vector cached_ranges = { + {150, 110}, + {100, 100}, + }; + + // Request [100, 260) is covered by overlapping cached ranges. The first copy uses + // [100, 200); the second resumes at cursor 200 and copies the tail from [150, 260). + auto plan = detail::plan_page_cache_range_read(100, 160, cached_ranges); + + ASSERT_EQ(plan.size(), 2); + expect_plan_entry(plan[0], {100, 100}, 0, 0, 100); + expect_plan_entry(plan[1], {150, 110}, 50, 100, 60); +} + +TEST(ParquetPageCacheRangeTest, PartialOverlapWithoutFullCoverageMisses) { + const std::vector cached_ranges = { + {100, 80}, + {200, 60}, + }; + + // Cached ranges cover [100, 180) and [200, 260), but [180, 200) is missing. + // The caller must read the whole request from the file instead of returning + // a partially cached result. + auto plan = detail::plan_page_cache_range_read(100, 160, cached_ranges); + + EXPECT_TRUE(plan.empty()); +} + +TEST(ParquetPageCacheRangeTest, NonCoveringAndInvalidRangesAreIgnored) { + const std::vector cached_ranges = { + {50, 20}, {100, 0}, {100, -1}, {180, 20}, {120, 30}, + }; + + // Only [120, 150) intersects the request, but it does not cover the request start + // [100, 120), so this is still a miss. + auto plan = detail::plan_page_cache_range_read(100, 50, cached_ranges); + + EXPECT_TRUE(plan.empty()); +} + +TEST(ParquetPageCacheRangeTest, InvalidRequestMisses) { + const std::vector cached_ranges = { + {100, 100}, + }; + + EXPECT_TRUE(detail::plan_page_cache_range_read(-1, 10, cached_ranges).empty()); + EXPECT_TRUE(detail::plan_page_cache_range_read(100, 0, cached_ranges).empty()); + EXPECT_TRUE(detail::plan_page_cache_range_read(100, -1, cached_ranges).empty()); +} + +} // namespace +} // namespace doris::format::parquet diff --git a/be/test/format_v2/parquet/parquet_reader_control_test.cpp b/be/test/format_v2/parquet/parquet_reader_control_test.cpp new file mode 100644 index 00000000000000..c7d430350d1b26 --- /dev/null +++ b/be/test/format_v2/parquet/parquet_reader_control_test.cpp @@ -0,0 +1,1034 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include +#include +#include +#include +#include +#include + +#include "common/consts.h" +#include "core/assert_cast.h" +#include "core/column/column_array.h" +#include "core/column/column_map.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_struct.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type/data_type_struct.h" +#include "format_v2/column_data.h" +#include "format_v2/parquet/parquet_column_schema.h" +#include "format_v2/parquet/parquet_statistics.h" +#include "format_v2/parquet/reader/column_reader.h" +#include "format_v2/parquet/reader/global_rowid_column_reader.h" +#include "format_v2/parquet/reader/list_column_reader.h" +#include "format_v2/parquet/reader/map_column_reader.h" +#include "format_v2/parquet/reader/nested_column_materializer.h" +#include "format_v2/parquet/reader/row_position_column_reader.h" +#include "format_v2/parquet/reader/scalar_column_reader.h" +#include "format_v2/parquet/reader/struct_column_reader.h" +#include "format_v2/parquet/selection_vector.h" +#include "storage/utils.h" + +namespace doris::format::parquet { +namespace { + +ParquetColumnSchema int64_schema(std::string name = "mock") { + ParquetColumnSchema schema; + schema.local_id = 0; + schema.name = std::move(name); + schema.type = std::make_shared(); + return schema; +} + +ParquetColumnSchema nested_int64_schema(std::string name, int16_t nullable_definition_level, + int16_t definition_level, int16_t repetition_level = 0, + int16_t repeated_ancestor_definition_level = 0) { + ParquetColumnSchema schema = int64_schema(std::move(name)); + schema.type = make_nullable(std::make_shared()); + schema.nullable_definition_level = nullable_definition_level; + schema.definition_level = definition_level; + schema.repetition_level = repetition_level; + schema.repeated_repetition_level = repetition_level; + schema.repeated_ancestor_definition_level = repeated_ancestor_definition_level; + return schema; +} + +ParquetColumnSchema nested_struct_schema() { + ParquetColumnSchema schema; + schema.local_id = 0; + schema.name = "struct"; + schema.kind = ParquetColumnSchemaKind::STRUCT; + schema.nullable_definition_level = 1; + schema.definition_level = 2; + schema.type = make_nullable(std::make_shared( + DataTypes {make_nullable(std::make_shared()), + make_nullable(std::make_shared())}, + Strings {"a", "b"})); + return schema; +} + +ParquetColumnSchema nested_list_schema(std::string name, DataTypePtr element_type, + int16_t nullable_definition_level, int16_t definition_level, + int16_t repetition_level, + int16_t repeated_ancestor_definition_level) { + ParquetColumnSchema schema; + schema.local_id = 0; + schema.name = std::move(name); + schema.kind = ParquetColumnSchemaKind::LIST; + schema.nullable_definition_level = nullable_definition_level; + schema.definition_level = definition_level; + schema.repetition_level = repetition_level; + schema.repeated_repetition_level = repetition_level; + schema.repeated_ancestor_definition_level = repeated_ancestor_definition_level; + schema.type = make_nullable(std::make_shared(std::move(element_type))); + return schema; +} + +ParquetColumnSchema nested_map_schema( + DataTypePtr value_type = make_nullable(std::make_shared())) { + ParquetColumnSchema schema; + schema.local_id = 0; + schema.name = "map"; + schema.kind = ParquetColumnSchemaKind::MAP; + schema.nullable_definition_level = 1; + schema.definition_level = 2; + schema.repetition_level = 1; + schema.repeated_ancestor_definition_level = 2; + schema.type = make_nullable(std::make_shared( + make_nullable(std::make_shared()), std::move(value_type))); + return schema; +} + +ParquetColumnSchema bare_repeated_int64_list_schema() { + ParquetColumnSchema schema; + schema.local_id = 0; + schema.name = "repeated"; + schema.kind = ParquetColumnSchemaKind::LIST; + schema.definition_level = 1; + schema.repetition_level = 1; + schema.repeated_repetition_level = 1; + schema.repeated_ancestor_definition_level = 1; + schema.type = std::make_shared(std::make_shared()); + return schema; +} + +std::unique_ptr primitive_child(int local_id, std::string name, + DataTypePtr type) { + auto child = std::make_unique(); + child->local_id = local_id; + child->name = std::move(name); + child->kind = ParquetColumnSchemaKind::PRIMITIVE; + child->leaf_column_id = local_id; + child->type = std::move(type); + child->type_descriptor.physical_type = ::parquet::Type::INT32; + child->type_descriptor.doris_type = child->type; + return child; +} + +ParquetColumnSchema struct_schema_for_projection() { + ParquetColumnSchema schema; + schema.local_id = 0; + schema.name = "s"; + schema.kind = ParquetColumnSchemaKind::STRUCT; + schema.children.push_back(primitive_child(0, "a", std::make_shared())); + schema.children.push_back(primitive_child(1, "b", std::make_shared())); + DataTypes types = {make_nullable(schema.children[0]->type), + make_nullable(schema.children[1]->type)}; + Strings names = {"a", "b"}; + schema.type = std::make_shared(types, names); + return schema; +} + +ParquetColumnSchema list_schema_for_projection() { + ParquetColumnSchema schema; + schema.local_id = 0; + schema.name = "xs"; + schema.kind = ParquetColumnSchemaKind::LIST; + schema.children.push_back(primitive_child(0, "element", std::make_shared())); + schema.type = std::make_shared(schema.children[0]->type); + return schema; +} + +ParquetColumnSchema map_schema_for_projection() { + ParquetColumnSchema schema; + schema.local_id = 0; + schema.name = "m"; + schema.kind = ParquetColumnSchemaKind::MAP; + schema.children.push_back(primitive_child(0, "key", std::make_shared())); + schema.children.push_back(primitive_child(1, "value", std::make_shared())); + schema.type = std::make_shared(make_nullable(schema.children[0]->type), + make_nullable(schema.children[1]->type)); + return schema; +} + +class CursorColumnReader final : public ParquetColumnReader { +public: + CursorColumnReader() : ParquetColumnReader(int64_schema(), std::make_shared()) {} + + Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override { + if (column.get() == nullptr || rows_read == nullptr) { + return Status::InvalidArgument("invalid mock read arguments"); + } + auto* values = assert_cast(column.get()); + for (int64_t row = 0; row < rows; ++row) { + values->insert_value(_cursor + row); + } + _read_lengths.push_back(rows); + _cursor += rows; + *rows_read = rows; + return Status::OK(); + } + + Status skip(int64_t rows) override { + _skip_lengths.push_back(rows); + _cursor += rows; + return Status::OK(); + } + + int64_t cursor() const { return _cursor; } + const std::vector& skip_lengths() const { return _skip_lengths; } + const std::vector& read_lengths() const { return _read_lengths; } + +private: + int64_t _cursor = 0; + std::vector _skip_lengths; + std::vector _read_lengths; +}; + +class NestedBuildReader final : public ParquetColumnReader { +public: + explicit NestedBuildReader(int64_t values_to_build) + : ParquetColumnReader(int64_schema("nested"), std::make_shared()), + _values_to_build(values_to_build) {} + + Status read(int64_t, MutableColumnPtr&, int64_t*) override { + return Status::NotSupported("unused"); + } + + Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column, + int64_t* values_read) override { + if (column.get() == nullptr || values_read == nullptr) { + return Status::InvalidArgument("invalid mock nested build arguments"); + } + _last_length_upper_bound = length_upper_bound; + auto* values = assert_cast(column.get()); + for (int64_t value = 0; value < _values_to_build; ++value) { + values->insert_value(value); + } + *values_read = _values_to_build; + return Status::OK(); + } + + int64_t last_length_upper_bound() const { return _last_length_upper_bound; } + +private: + int64_t _values_to_build = 0; + int64_t _last_length_upper_bound = 0; +}; + +class ScriptedNestedReader final : public ParquetColumnReader { +public: + ScriptedNestedReader(ParquetColumnSchema schema, DataTypePtr type, + std::vector def_levels, std::vector rep_levels, + bool has_repeated_child = false, bool build_nulls = false) + : ParquetColumnReader(schema, std::move(type)), + _def_levels(std::move(def_levels)), + _rep_levels(std::move(rep_levels)), + _has_repeated_child(has_repeated_child), + _build_nulls(build_nulls) {} + + Status read(int64_t, MutableColumnPtr&, int64_t*) override { + return Status::NotSupported("unused"); + } + + Status load_nested_batch(int64_t rows) override { + _load_lengths.push_back(rows); + return Status::OK(); + } + + Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column, + int64_t* values_read) override { + _build_lengths.push_back(length_upper_bound); + if (column.get() == nullptr || values_read == nullptr) { + return Status::InvalidArgument("invalid scripted nested build arguments"); + } + for (int64_t row = 0; row < length_upper_bound; ++row) { + insert_value(column, _next_value++, _build_nulls); + } + *values_read = length_upper_bound; + return Status::OK(); + } + + const std::vector& nested_definition_levels() const override { return _def_levels; } + const std::vector& nested_repetition_levels() const override { return _rep_levels; } + int64_t nested_levels_written() const override { + return static_cast(_def_levels.size()); + } + bool is_or_has_repeated_child() const override { return _has_repeated_child; } + + const std::vector& build_lengths() const { return _build_lengths; } + +private: + static void insert_value(MutableColumnPtr& column, int64_t value, bool is_null) { + if (auto* nullable_column = check_and_get_column(*column); + nullable_column != nullptr) { + if (is_null) { + nullable_column->insert_default(); + return; + } + assert_cast(nullable_column->get_nested_column()).insert_value(value); + nullable_column->get_null_map_data().push_back(0); + return; + } + assert_cast(*column).insert_value(value); + } + + std::vector _def_levels; + std::vector _rep_levels; + bool _has_repeated_child = false; + bool _build_nulls = false; + int64_t _next_value = 0; + std::vector _load_lengths; + std::vector _build_lengths; +}; + +} // namespace + +struct ScalarColumnReaderTestAccess { + static void set_nested_batch(ScalarColumnReader* reader, + std::unique_ptr batch) { + reader->_nested_batch = std::move(batch); + } + + static int64_t page_filtered_rows_to_skip(const ScalarColumnReader& reader, int64_t rows) { + return reader.page_filtered_rows_to_skip(rows); + } + + static void set_row_group_rows_read(ScalarColumnReader* reader, int64_t rows) { + reader->_row_group_rows_read = rows; + } +}; + +namespace { + +std::unique_ptr make_scripted_scalar_reader( + ParquetColumnSchema schema, std::unique_ptr batch) { + auto reader = std::make_unique(schema, nullptr); + ScalarColumnReaderTestAccess::set_nested_batch(reader.get(), std::move(batch)); + return reader; +} + +std::unique_ptr scalar_batch(std::vector def_levels, + std::vector rep_levels, + std::vector value_indices, + std::vector values) { + auto batch = std::make_unique(); + batch->levels_written = static_cast(def_levels.size()); + batch->def_levels = std::move(def_levels); + batch->rep_levels = std::move(rep_levels); + batch->value_indices = std::move(value_indices); + auto column = ColumnInt64::create(); + for (const auto value : values) { + column->insert_value(value); + } + batch->values_column = std::move(column); + return batch; +} + +class DefaultOnlyReader final : public ParquetColumnReader { +public: + DefaultOnlyReader() + : ParquetColumnReader(int64_schema("default_only"), std::make_shared()) { + } + + Status read(int64_t, MutableColumnPtr&, int64_t*) override { + return Status::NotSupported("unused"); + } +}; + +GlobalRowLoacationV2 decode_rowid(const ColumnString& column, size_t row) { + const auto ref = column.get_data_at(row); + EXPECT_EQ(ref.size, sizeof(GlobalRowLoacationV2)); + GlobalRowLoacationV2 location(0, 0, 0, 0); + std::memcpy(&location, ref.data, sizeof(GlobalRowLoacationV2)); + return location; +} + +} // namespace + +TEST(SelectionVectorTest, IdentitySelectionToRanges) { + SelectionVector selection; + const auto ranges = selection_to_ranges(selection, 5); + ASSERT_EQ(ranges.size(), 1); + EXPECT_EQ(ranges[0].start, 0); + EXPECT_EQ(ranges[0].length, 5); + EXPECT_TRUE(selection.verify(5, 5).ok()); +} + +TEST(SelectionVectorTest, ExternalBufferSelectionToRanges) { + SelectionVector::Index indices[] = {0, 1, 4, 6, 7}; + SelectionVector selection(indices, std::size(indices)); + const auto ranges = selection_to_ranges(selection, std::size(indices)); + ASSERT_EQ(ranges.size(), 3); + EXPECT_EQ(ranges[0].start, 0); + EXPECT_EQ(ranges[0].length, 2); + EXPECT_EQ(ranges[1].start, 4); + EXPECT_EQ(ranges[1].length, 1); + EXPECT_EQ(ranges[2].start, 6); + EXPECT_EQ(ranges[2].length, 2); + EXPECT_TRUE(selection.verify(std::size(indices), 8).ok()); +} + +TEST(SelectionVectorTest, VerifyRejectsInvalidSelection) { + SelectionVector selection(2); + EXPECT_FALSE(selection.verify(3, 3).ok()); + EXPECT_FALSE(selection.verify(1, -1).ok()); + + selection.set_index(0, 2); + selection.set_index(1, 1); + EXPECT_FALSE(selection.verify(2, 3).ok()); + + selection.set_index(0, 0); + selection.set_index(1, 3); + EXPECT_FALSE(selection.verify(2, 3).ok()); +} + +TEST(ParquetColumnReaderControlTest, BaseSelectUsesSkipReadRanges) { + CursorColumnReader reader; + SelectionVector selection(3); + selection.set_index(0, 0); + selection.set_index(1, 2); + selection.set_index(2, 4); + + auto column = std::make_shared()->create_column(); + ASSERT_TRUE(reader.select(selection, 3, 6, column).ok()); + + const auto& values = assert_cast(*column); + ASSERT_EQ(values.size(), 3); + EXPECT_EQ(values.get_element(0), 0); + EXPECT_EQ(values.get_element(1), 2); + EXPECT_EQ(values.get_element(2), 4); + EXPECT_EQ(reader.cursor(), 6); + EXPECT_EQ(reader.read_lengths(), std::vector({1, 1, 1})); + EXPECT_EQ(reader.skip_lengths(), std::vector({0, 1, 1, 1})); +} + +TEST(ParquetColumnReaderControlTest, BaseSelectZeroRowsConsumesBatch) { + CursorColumnReader reader; + SelectionVector selection; + auto column = std::make_shared()->create_column(); + ASSERT_TRUE(reader.select(selection, 0, 4, column).ok()); + EXPECT_EQ(column->size(), 0); + EXPECT_EQ(reader.cursor(), 4); + EXPECT_TRUE(reader.read_lengths().empty()); + EXPECT_EQ(reader.skip_lengths(), std::vector({4})); +} + +TEST(ParquetColumnReaderControlTest, BaseNestedDefaultsAndSkipNested) { + DefaultOnlyReader base_reader; + EXPECT_FALSE(base_reader.skip(1).ok()); + EXPECT_FALSE(base_reader.load_nested_batch(1).ok()); + + auto column = std::make_shared()->create_column(); + int64_t values_read = 0; + EXPECT_FALSE(base_reader.build_nested_column(1, column, &values_read).ok()); + + NestedBuildReader ok_reader(3); + ASSERT_TRUE(ok_reader.skip_nested_column(3).ok()); + EXPECT_EQ(ok_reader.last_length_upper_bound(), 3); + + NestedBuildReader short_reader(2); + EXPECT_FALSE(short_reader.skip_nested_column(3).ok()); +} + +TEST(ParquetColumnReaderControlTest, NestedMaterializerHelpersAppendOffsetsAndParentNulls) { + ColumnArray::Offsets64 offsets; + append_offsets(offsets, {3, 0, 2}); + ASSERT_EQ(offsets.size(), 3); + EXPECT_EQ(offsets[0], 3); + EXPECT_EQ(offsets[1], 3); + EXPECT_EQ(offsets[2], 5); + append_offsets(offsets, {1, 4}); + ASSERT_EQ(offsets.size(), 5); + EXPECT_EQ(offsets[3], 6); + EXPECT_EQ(offsets[4], 10); + + const NullMap parent_nulls = {0, 1, 0}; + append_parent_nulls(nullptr, parent_nulls); + NullMap dst = {1}; + append_parent_nulls(&dst, parent_nulls); + EXPECT_EQ(dst, NullMap({1, 0, 1, 0})); +} + +TEST(ParquetColumnReaderControlTest, PageFilteredRowsToSkipUsesOnlyFullSkippedRanges) { + ParquetPageSkipPlan page_skip_plan; + page_skip_plan.skipped_ranges = {RowRange {0, 3}, RowRange {5, 2}, RowRange {10, 4}}; + + auto schema = nested_int64_schema("page_filtered", 0, 0); + ScalarColumnReader reader(schema, nullptr, &page_skip_plan); + EXPECT_EQ(ScalarColumnReaderTestAccess::page_filtered_rows_to_skip(reader, 3), 3); + EXPECT_EQ(ScalarColumnReaderTestAccess::page_filtered_rows_to_skip(reader, 5), 3); + + ScalarColumnReaderTestAccess::set_row_group_rows_read(&reader, 5); + EXPECT_EQ(ScalarColumnReaderTestAccess::page_filtered_rows_to_skip(reader, 2), 2); + EXPECT_EQ(ScalarColumnReaderTestAccess::page_filtered_rows_to_skip(reader, 5), 2); +} + +TEST(ParquetColumnReaderControlTest, StructSkipsNullParentForRepeatedChildAndBatchesPresentRows) { + auto repeated_child = std::make_unique( + nested_int64_schema("repeated_shape", 1, 2, 1), + make_nullable(std::make_shared()), std::vector {2, 2, 2, 2}, + std::vector {0, 0, 0, 0}, true); + auto* repeated_child_ptr = repeated_child.get(); + auto scalar_child = make_scripted_scalar_reader( + nested_int64_schema("scalar_child", 1, 2), + scalar_batch({2, 0, 2, 2}, {0, 0, 0, 0}, {0, -1, 1, 2}, {10, 20, 30})); + auto* scalar_child_ptr = scalar_child.get(); + + std::vector> children; + children.push_back(std::move(repeated_child)); + children.push_back(std::move(scalar_child)); + StructColumnReader reader(nested_struct_schema(), + make_nullable(std::make_shared( + DataTypes {make_nullable(std::make_shared()), + make_nullable(std::make_shared())}, + Strings {"a", "b"})), + std::move(children), {0, 1}); + + auto column = reader.type()->create_column(); + int64_t rows_read = 0; + auto status = reader.build_nested_column(4, column, &rows_read); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(rows_read, 4); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 4); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_FALSE(nullable_column.is_null_at(3)); + EXPECT_EQ(repeated_child_ptr->build_lengths(), std::vector({1, 2})); + EXPECT_EQ(scalar_child_ptr->nested_build_level_cursor(), 4); +} + +TEST(ParquetColumnReaderControlTest, StructFallsBackToFirstChildWhenAllChildrenAreRepeated) { + auto first_child = std::make_unique( + nested_int64_schema("first", 1, 2, 1), make_nullable(std::make_shared()), + std::vector {2, 0}, std::vector {0, 0}, true); + auto second_child = std::make_unique( + nested_int64_schema("second", 1, 2, 1), + make_nullable(std::make_shared()), std::vector {2, 2}, + std::vector {0, 0}, true); + + std::vector> children; + children.push_back(std::move(first_child)); + children.push_back(std::move(second_child)); + StructColumnReader reader(nested_struct_schema(), nested_struct_schema().type, + std::move(children), {0, 1}); + + auto column = reader.type()->create_column(); + int64_t rows_read = 0; + auto status = reader.build_nested_column(2, column, &rows_read); + ASSERT_TRUE(status.ok()) << status; + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(rows_read, 2); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); +} + +TEST(ParquetColumnReaderControlTest, StructNullParentAdvancesComplexChildShapeOnly) { + auto shape_child = std::make_unique( + nested_int64_schema("shape", 1, 2), make_nullable(std::make_shared()), + std::vector {2, 2, 0, 0, 2, 2}, std::vector {0, 0, 0, 0, 0, 0}); + + ParquetColumnSchema map_schema = nested_map_schema(); + map_schema.nullable_definition_level = 2; + map_schema.definition_level = 3; + map_schema.repeated_ancestor_definition_level = 0; + auto key_reader = std::make_unique( + nested_int64_schema("key", 3, 3, 1, 0), + make_nullable(std::make_shared()), + std::vector {3, 3, 0, 0, 3, 3}, std::vector {0, 0, 0, 0, 0, 0}); + auto value_reader = + make_scripted_scalar_reader(nested_int64_schema("value", 4, 4, 1, 0), + scalar_batch({4, 4, 0, 0, 4, 4}, {0, 0, 0, 0, 0, 0}, + {0, 1, -1, -1, 2, 3}, {10, 20, 30, 40})); + auto map_reader = std::make_unique( + map_schema, map_schema.type, std::move(key_reader), std::move(value_reader)); + + std::vector> children; + children.push_back(std::move(shape_child)); + children.push_back(std::move(map_reader)); + auto struct_type = make_nullable(std::make_shared(DataTypes {map_schema.type}, + Strings {"partitionValues"})); + StructColumnReader reader(nested_struct_schema(), struct_type, std::move(children), {-1, 0}); + + auto column = reader.type()->create_column(); + int64_t rows_read = 0; + auto status = reader.build_nested_column(6, column, &rows_read); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(rows_read, 6); + + const auto& nullable_struct = assert_cast(*column); + ASSERT_EQ(nullable_struct.size(), 6); + EXPECT_FALSE(nullable_struct.is_null_at(0)); + EXPECT_FALSE(nullable_struct.is_null_at(1)); + EXPECT_TRUE(nullable_struct.is_null_at(2)); + EXPECT_TRUE(nullable_struct.is_null_at(3)); + EXPECT_FALSE(nullable_struct.is_null_at(4)); + EXPECT_FALSE(nullable_struct.is_null_at(5)); + + const auto& struct_column = + assert_cast(nullable_struct.get_nested_column()); + const auto& map_nullable = assert_cast(struct_column.get_column(0)); + ASSERT_EQ(map_nullable.size(), 6); + EXPECT_FALSE(map_nullable.is_null_at(0)); + EXPECT_FALSE(map_nullable.is_null_at(1)); + EXPECT_TRUE(map_nullable.is_null_at(2)); + EXPECT_TRUE(map_nullable.is_null_at(3)); + EXPECT_FALSE(map_nullable.is_null_at(4)); + EXPECT_FALSE(map_nullable.is_null_at(5)); + const auto& map_column = assert_cast(map_nullable.get_nested_column()); + ASSERT_EQ(map_column.get_offsets().size(), 6); + EXPECT_EQ(map_column.get_offsets()[0], 1); + EXPECT_EQ(map_column.get_offsets()[1], 2); + EXPECT_EQ(map_column.get_offsets()[2], 2); + EXPECT_EQ(map_column.get_offsets()[3], 2); + EXPECT_EQ(map_column.get_offsets()[4], 3); + EXPECT_EQ(map_column.get_offsets()[5], 4); +} + +TEST(ParquetColumnReaderControlTest, StructNullParentAdvancesNestedStructDescendants) { + auto shape_child = std::make_unique( + nested_int64_schema("shape", 1, 2), make_nullable(std::make_shared()), + std::vector {2, 0, 2}, std::vector {0, 0, 0}); + + auto id_batch = scalar_batch({4, 3, 4}, {0, 0, 0}, {0, -1, 1}, {10, 20}); + id_batch->value_slot_definition_level = 3; + auto id_reader = + make_scripted_scalar_reader(nested_int64_schema("id", 3, 4), std::move(id_batch)); + + ParquetColumnSchema inner_schema; + inner_schema.local_id = 0; + inner_schema.name = "stats_parsed"; + inner_schema.kind = ParquetColumnSchemaKind::STRUCT; + inner_schema.nullable_definition_level = 2; + inner_schema.definition_level = 3; + inner_schema.type = make_nullable(std::make_shared( + DataTypes {make_nullable(std::make_shared())}, Strings {"id"})); + + std::vector> inner_children; + inner_children.push_back(std::move(id_reader)); + auto inner_reader = std::make_unique( + inner_schema, inner_schema.type, std::move(inner_children), std::vector {0}); + + std::vector> outer_children; + outer_children.push_back(std::move(shape_child)); + outer_children.push_back(std::move(inner_reader)); + auto outer_type = make_nullable(std::make_shared(DataTypes {inner_schema.type}, + Strings {"stats_parsed"})); + StructColumnReader reader(nested_struct_schema(), outer_type, std::move(outer_children), + {-1, 0}); + + auto column = reader.type()->create_column(); + int64_t rows_read = 0; + auto status = reader.build_nested_column(3, column, &rows_read); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(rows_read, 3); + + const auto& outer_nullable = assert_cast(*column); + ASSERT_EQ(outer_nullable.size(), 3); + EXPECT_FALSE(outer_nullable.is_null_at(0)); + EXPECT_TRUE(outer_nullable.is_null_at(1)); + EXPECT_FALSE(outer_nullable.is_null_at(2)); + + const auto& outer_struct = assert_cast(outer_nullable.get_nested_column()); + const auto& inner_nullable = assert_cast(outer_struct.get_column(0)); + ASSERT_EQ(inner_nullable.size(), 3); + EXPECT_FALSE(inner_nullable.is_null_at(0)); + EXPECT_TRUE(inner_nullable.is_null_at(1)); + EXPECT_FALSE(inner_nullable.is_null_at(2)); + + const auto& inner_struct = assert_cast(inner_nullable.get_nested_column()); + const auto& id_nullable = assert_cast(inner_struct.get_column(0)); + const auto& id_values = assert_cast(id_nullable.get_nested_column()); + EXPECT_EQ(id_values.get_element(0), 10); + EXPECT_EQ(id_values.get_element(2), 20); +} + +TEST(ParquetColumnReaderControlTest, ListKeepsEmptyBareRepeatedPrimitiveRows) { + auto element_reader = std::make_unique( + nested_int64_schema("element", 0, 1, 1, 1), std::make_shared(), + std::vector {0, 1, 1, 0}, std::vector {0, 0, 1, 0}); + auto* element_reader_ptr = element_reader.get(); + ListColumnReader reader(bare_repeated_int64_list_schema(), + bare_repeated_int64_list_schema().type, std::move(element_reader)); + + auto column = reader.type()->create_column(); + int64_t rows_read = 0; + auto status = reader.build_nested_column(3, column, &rows_read); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(rows_read, 3); + + const auto& array_column = assert_cast(*column); + ASSERT_EQ(array_column.get_offsets().size(), 3); + EXPECT_EQ(array_column.get_offsets()[0], 0); + EXPECT_EQ(array_column.get_offsets()[1], 2); + EXPECT_EQ(array_column.get_offsets()[2], 2); + EXPECT_EQ(element_reader_ptr->build_lengths(), std::vector({2})); +} + +TEST(ParquetColumnReaderControlTest, NestedListSkipsAncestorEmptyRowsButKeepsNullElements) { + auto element_reader = + std::make_unique(nested_int64_schema("element", 5, 5, 2, 4), + make_nullable(std::make_shared()), + std::vector {1, 5, 5, 5, 2, 5, 2, 0}, + std::vector {0, 0, 2, 1, 0, 1, 1, 0}); + auto* element_reader_ptr = element_reader.get(); + + const auto inner_type = make_nullable( + std::make_shared(make_nullable(std::make_shared()))); + auto inner_reader = std::make_unique( + nested_list_schema("inner", make_nullable(std::make_shared()), 3, 4, 2, + 2), + inner_type, std::move(element_reader)); + auto outer_type = make_nullable(std::make_shared(inner_type)); + ListColumnReader reader(nested_list_schema("outer", inner_type, 1, 2, 1, 2), outer_type, + std::move(inner_reader)); + + auto column = reader.type()->create_column(); + int64_t rows_read = 0; + auto status = reader.build_nested_column(4, column, &rows_read); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(rows_read, 4); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 4); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_TRUE(nullable_column.is_null_at(3)); + + const auto& outer_array = assert_cast(nullable_column.get_nested_column()); + const auto& outer_offsets = outer_array.get_offsets(); + ASSERT_EQ(outer_offsets.size(), 4); + EXPECT_EQ(outer_offsets[0], 0); + EXPECT_EQ(outer_offsets[1], 2); + EXPECT_EQ(outer_offsets[2], 5); + EXPECT_EQ(outer_offsets[3], 5); + + const auto& inner_nullable = assert_cast(outer_array.get_data()); + ASSERT_EQ(inner_nullable.size(), 5); + EXPECT_FALSE(inner_nullable.is_null_at(0)); + EXPECT_FALSE(inner_nullable.is_null_at(1)); + EXPECT_TRUE(inner_nullable.is_null_at(2)); + EXPECT_FALSE(inner_nullable.is_null_at(3)); + EXPECT_TRUE(inner_nullable.is_null_at(4)); + + const auto& inner_array = assert_cast(inner_nullable.get_nested_column()); + const auto& inner_offsets = inner_array.get_offsets(); + ASSERT_EQ(inner_offsets.size(), 5); + EXPECT_EQ(inner_offsets[0], 2); + EXPECT_EQ(inner_offsets[1], 3); + EXPECT_EQ(inner_offsets[2], 3); + EXPECT_EQ(inner_offsets[3], 4); + EXPECT_EQ(inner_offsets[4], 4); + EXPECT_EQ(element_reader_ptr->build_lengths(), std::vector({4})); +} + +TEST(ParquetColumnReaderControlTest, MapKeepsEmptyMapRows) { + auto key_reader = std::make_unique( + nested_int64_schema("key", 1, 2, 1, 2), + make_nullable(std::make_shared()), std::vector {1}, + std::vector {0}); + auto value_reader = std::make_unique( + nested_int64_schema("value", 2, 3, 1, 2), + make_nullable(std::make_shared()), std::vector {1}, + std::vector {0}); + auto* value_reader_ptr = value_reader.get(); + MapColumnReader reader(nested_map_schema(), nested_map_schema().type, std::move(key_reader), + std::move(value_reader)); + + auto column = reader.type()->create_column(); + int64_t rows_read = 0; + auto status = reader.build_nested_column(1, column, &rows_read); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(rows_read, 1); + + const auto& nullable_map = assert_cast(*column); + EXPECT_FALSE(nullable_map.is_null_at(0)); + const auto& map_column = assert_cast(nullable_map.get_nested_column()); + ASSERT_EQ(map_column.get_offsets().size(), 1); + EXPECT_EQ(map_column.get_offsets()[0], 0); + EXPECT_EQ(value_reader_ptr->build_lengths(), std::vector({0})); +} + +TEST(ParquetColumnReaderControlTest, ListMapSkipsAncestorEmptyRowsBeforeScalarValues) { + auto key_reader = std::make_unique( + nested_int64_schema("key", 4, 4, 2, 4), + make_nullable(std::make_shared()), std::vector {1, 4}, + std::vector {0, 0}); + auto value_reader = make_scripted_scalar_reader(nested_int64_schema("value", 5, 5, 2, 4), + scalar_batch({1, 5}, {0, 0}, {-1, 0}, {100})); + + const auto map_type = make_nullable( + std::make_shared(make_nullable(std::make_shared()), + make_nullable(std::make_shared()))); + auto map_reader = std::make_unique( + nested_map_schema(make_nullable(std::make_shared())), map_type, + std::move(key_reader), std::move(value_reader)); + auto outer_type = make_nullable(std::make_shared(map_type)); + ListColumnReader reader(nested_list_schema("outer", map_type, 1, 2, 1, 2), outer_type, + std::move(map_reader)); + + auto column = reader.type()->create_column(); + int64_t rows_read = 0; + auto status = reader.build_nested_column(2, column, &rows_read); + ASSERT_TRUE(status.ok()) << status; + ASSERT_EQ(rows_read, 2); + + const auto& nullable_column = assert_cast(*column); + ASSERT_EQ(nullable_column.size(), 2); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_FALSE(nullable_column.is_null_at(1)); + + const auto& outer_array = assert_cast(nullable_column.get_nested_column()); + const auto& outer_offsets = outer_array.get_offsets(); + ASSERT_EQ(outer_offsets.size(), 2); + EXPECT_EQ(outer_offsets[0], 0); + EXPECT_EQ(outer_offsets[1], 1); + + const auto& map_nullable = assert_cast(outer_array.get_data()); + ASSERT_EQ(map_nullable.size(), 1); + EXPECT_FALSE(map_nullable.is_null_at(0)); + const auto& map_column = assert_cast(map_nullable.get_nested_column()); + ASSERT_EQ(map_column.get_offsets().size(), 1); + EXPECT_EQ(map_column.get_offsets()[0], 1); + + const auto& values = assert_cast(map_column.get_values()); + const auto& value_data = assert_cast(values.get_nested_column()); + ASSERT_EQ(values.size(), 1); + EXPECT_FALSE(values.is_null_at(0)); + EXPECT_EQ(value_data.get_element(0), 100); +} + +TEST(ParquetColumnReaderControlTest, MapRejectsNullKeysAndMisalignedScalarValueRepLevels) { + auto key_reader = std::make_unique( + nested_int64_schema("key", 1, 2, 1), make_nullable(std::make_shared()), + std::vector {2}, std::vector {0}, false, true); + auto value_reader = std::make_unique( + nested_int64_schema("value", 1, 2, 1), make_nullable(std::make_shared()), + std::vector {2}, std::vector {0}); + MapColumnReader null_key_reader(nested_map_schema(), nested_map_schema().type, + std::move(key_reader), std::move(value_reader)); + auto column = null_key_reader.type()->create_column(); + int64_t rows_read = 0; + auto status = null_key_reader.build_nested_column(1, column, &rows_read); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("contains null key"), std::string::npos); + + auto aligned_key_reader = std::make_unique( + nested_int64_schema("key", 1, 2, 1), make_nullable(std::make_shared()), + std::vector {2, 2}, std::vector {0, 1}); + auto misaligned_value_reader = + make_scripted_scalar_reader(nested_int64_schema("value", 2, 3, 1), + scalar_batch({3, 3}, {0, 0}, {0, 1}, {100, 200})); + MapColumnReader misaligned_reader(nested_map_schema(), nested_map_schema().type, + std::move(aligned_key_reader), + std::move(misaligned_value_reader)); + column = misaligned_reader.type()->create_column(); + status = misaligned_reader.build_nested_column(1, column, &rows_read); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("value repetition level is not aligned"), std::string::npos); +} + +TEST(ParquetColumnReaderControlTest, MapBuildsScalarAndComplexValuePaths) { + auto key_reader = std::make_unique( + nested_int64_schema("key", 1, 2, 1), make_nullable(std::make_shared()), + std::vector {2, 2}, std::vector {0, 1}); + auto scalar_value_reader = + make_scripted_scalar_reader(nested_int64_schema("value", 2, 3, 1), + scalar_batch({3, 3}, {0, 1}, {0, 1}, {100, 200})); + MapColumnReader scalar_reader(nested_map_schema(), nested_map_schema().type, + std::move(key_reader), std::move(scalar_value_reader)); + auto column = scalar_reader.type()->create_column(); + int64_t rows_read = 0; + auto status = scalar_reader.build_nested_column(1, column, &rows_read); + ASSERT_TRUE(status.ok()) << status; + const auto& nullable_map = assert_cast(*column); + const auto& map_column = assert_cast(nullable_map.get_nested_column()); + ASSERT_EQ(map_column.get_offsets().size(), 1); + EXPECT_EQ(map_column.get_offsets()[0], 2); + const auto& values = assert_cast(map_column.get_values()); + const auto& value_data = assert_cast(values.get_nested_column()); + ASSERT_EQ(values.size(), 2); + EXPECT_EQ(value_data.get_element(0), 100); + EXPECT_EQ(value_data.get_element(1), 200); + + auto complex_key_reader = std::make_unique( + nested_int64_schema("key", 1, 2, 1), make_nullable(std::make_shared()), + std::vector {2, 2}, std::vector {0, 1}); + auto complex_value_reader = std::make_unique( + nested_int64_schema("complex_value", 2, 3, 1), + make_nullable(std::make_shared()), std::vector {3, 3}, + std::vector {0, 1}); + auto* complex_value_reader_ptr = complex_value_reader.get(); + MapColumnReader complex_reader(nested_map_schema(), nested_map_schema().type, + std::move(complex_key_reader), std::move(complex_value_reader)); + column = complex_reader.type()->create_column(); + status = complex_reader.build_nested_column(1, column, &rows_read); + ASSERT_TRUE(status.ok()) << status; + EXPECT_EQ(complex_value_reader_ptr->build_lengths(), std::vector({2})); +} + +TEST(ParquetVirtualColumnReaderTest, RowPositionReadSkipAndInvalidArgs) { + RowPositionColumnReader reader(100); + EXPECT_EQ(reader.file_column_id(), format::ROW_POSITION_COLUMN_ID); + EXPECT_EQ(reader.parquet_leaf_column_id(), -1); + EXPECT_EQ(reader.name(), format::ROW_POSITION_COLUMN_NAME); + + auto column = reader.type()->create_column(); + int64_t rows_read = 0; + ASSERT_TRUE(reader.read(2, column, &rows_read).ok()); + ASSERT_EQ(rows_read, 2); + ASSERT_TRUE(reader.skip(3).ok()); + ASSERT_TRUE(reader.read(2, column, &rows_read).ok()); + + const auto& values = assert_cast(*column); + ASSERT_EQ(values.size(), 4); + EXPECT_EQ(values.get_element(0), 100); + EXPECT_EQ(values.get_element(1), 101); + EXPECT_EQ(values.get_element(2), 105); + EXPECT_EQ(values.get_element(3), 106); + + MutableColumnPtr null_column; + EXPECT_FALSE(reader.read(1, null_column, &rows_read).ok()); + EXPECT_FALSE(reader.read(-1, column, &rows_read).ok()); + EXPECT_FALSE(reader.read(1, column, nullptr).ok()); +} + +TEST(ParquetVirtualColumnReaderTest, GlobalRowIdReadSkipSelectAndInvalidArgs) { + format::GlobalRowIdContext context {.version = 7, .backend_id = 123456789, .file_id = 42}; + GlobalRowIdColumnReader reader(context, 10); + EXPECT_EQ(reader.file_column_id(), format::GLOBAL_ROWID_COLUMN_ID); + EXPECT_EQ(reader.parquet_leaf_column_id(), -1); + EXPECT_EQ(reader.name(), BeConsts::GLOBAL_ROWID_COL); + + auto column = reader.type()->create_column(); + int64_t rows_read = 0; + ASSERT_TRUE(reader.read(2, column, &rows_read).ok()); + ASSERT_TRUE(reader.skip(2).ok()); + ASSERT_TRUE(reader.read(1, column, &rows_read).ok()); + + const auto& strings = assert_cast(*column); + ASSERT_EQ(strings.size(), 3); + const auto first = decode_rowid(strings, 0); + EXPECT_EQ(first.version, context.version); + EXPECT_EQ(first.backend_id, context.backend_id); + EXPECT_EQ(first.file_id, context.file_id); + EXPECT_EQ(first.row_id, 10); + EXPECT_EQ(decode_rowid(strings, 1).row_id, 11); + EXPECT_EQ(decode_rowid(strings, 2).row_id, 14); + + GlobalRowIdColumnReader select_reader(context, 20); + SelectionVector selection(2); + selection.set_index(0, 1); + selection.set_index(1, 3); + auto selected_column = select_reader.type()->create_column(); + ASSERT_TRUE(select_reader.select(selection, 2, 5, selected_column).ok()); + const auto& selected_strings = assert_cast(*selected_column); + ASSERT_EQ(selected_strings.size(), 2); + EXPECT_EQ(decode_rowid(selected_strings, 0).row_id, 21); + EXPECT_EQ(decode_rowid(selected_strings, 1).row_id, 23); + + MutableColumnPtr null_column; + EXPECT_FALSE(reader.read(1, null_column, &rows_read).ok()); + EXPECT_FALSE(reader.read(-1, column, &rows_read).ok()); + EXPECT_FALSE(reader.read(1, column, nullptr).ok()); +} + +TEST(ParquetColumnReaderFactoryTest, RejectsInvalidLeafIdBeforeCreatingRecordReader) { + ParquetColumnSchema schema = int64_schema("bad_leaf"); + schema.kind = ParquetColumnSchemaKind::PRIMITIVE; + schema.leaf_column_id = 3; + schema.type_descriptor.physical_type = ::parquet::Type::INT64; + schema.type_descriptor.doris_type = schema.type; + + ParquetColumnReaderFactory factory(nullptr, 1); + std::unique_ptr reader; + const auto status = factory.create(schema, &reader); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("Invalid parquet leaf column id"), std::string::npos); +} + +TEST(ParquetColumnReaderFactoryTest, RejectsStructInvalidAndEmptyProjection) { + auto schema = struct_schema_for_projection(); + ParquetColumnReaderFactory factory(nullptr, 0); + std::unique_ptr reader; + + auto invalid_projection = format::LocalColumnIndex::partial_local(0); + invalid_projection.children.push_back(format::LocalColumnIndex::local(9)); + auto status = factory.create(schema, &invalid_projection, &reader); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("invalid child"), std::string::npos); + + auto empty_projection = format::LocalColumnIndex::partial_local(0); + status = factory.create(schema, &empty_projection, &reader); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("contains no children"), std::string::npos); +} + +TEST(ParquetColumnReaderFactoryTest, RejectsListProjectionWithoutElement) { + auto schema = list_schema_for_projection(); + ParquetColumnReaderFactory factory(nullptr, 0); + std::unique_ptr reader; + + auto projection = format::LocalColumnIndex::partial_local(0); + const auto status = factory.create(schema, &projection, &reader); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("contains no element"), std::string::npos); +} + +TEST(ParquetColumnReaderFactoryTest, RejectsMapInvalidAndKeyOnlyProjection) { + auto schema = map_schema_for_projection(); + ParquetColumnReaderFactory factory(nullptr, 0); + std::unique_ptr reader; + + auto invalid_projection = format::LocalColumnIndex::partial_local(0); + invalid_projection.children.push_back(format::LocalColumnIndex::local(1)); + invalid_projection.children.push_back(format::LocalColumnIndex::local(9)); + auto status = factory.create(schema, &invalid_projection, &reader); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("invalid child"), std::string::npos); + + auto key_only_projection = format::LocalColumnIndex::partial_local(0); + key_only_projection.children.push_back(format::LocalColumnIndex::local(0)); + status = factory.create(schema, &key_only_projection, &reader); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("contains no value"), std::string::npos); +} + +} // namespace doris::format::parquet diff --git a/be/test/format_v2/parquet/parquet_reader_test.cpp b/be/test/format_v2/parquet/parquet_reader_test.cpp new file mode 100644 index 00000000000000..ec71ebc614d633 --- /dev/null +++ b/be/test/format_v2/parquet/parquet_reader_test.cpp @@ -0,0 +1,2274 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/parquet_reader.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type/data_type_struct.h" +#include "core/data_type/primitive_type.h" +#include "core/field.h" +#include "exprs/vexpr.h" +#include "exprs/vexpr_context.h" +#include "exprs/vslot_ref.h" +#include "format_v2/column_mapper.h" +#include "format_v2/expr/delete_predicate.h" +#include "format_v2/file_reader.h" +#include "format_v2/parquet/parquet_column_schema.h" +#include "format_v2/parquet/parquet_scan.h" +#include "format_v2/parquet/reader/column_reader.h" +#include "format_v2/table_reader.h" +#include "gen_cpp/Types_types.h" +#include "io/io_common.h" +#include "runtime/runtime_state.h" +#include "storage/predicate/predicate_creator.h" +#include "storage/segment/condition_cache.h" +#include "storage/utils.h" + +namespace doris { +namespace { + +constexpr int64_t ROW_COUNT = 5; + +format::LocalColumnIndex field_projection(int32_t column_id) { + return format::LocalColumnIndex {.index = column_id}; +} + +template +const ColumnType& nullable_nested_column(const Block& block, size_t position) { + const IColumn* column = block.get_by_position(position).column.get(); + int nullable_depth = 0; + while (const auto* nullable = check_and_get_column(*column)) { + const auto& null_map = nullable->get_null_map_data(); + for (size_t row = 0; row < null_map.size(); ++row) { + EXPECT_EQ(null_map[row], 0) << "Unexpected null at row " << row << ", column position " + << position << ", nullable depth " << nullable_depth; + } + column = &nullable->get_nested_column(); + ++nullable_depth; + } + EXPECT_GT(nullable_depth, 0) << "Expected a nullable file-local column at position " + << position; + return assert_cast(*column); +} + +class Int32GreaterThanExpr final : public VExpr { +public: + Int32GreaterThanExpr(int column_id, int32_t value) + : VExpr(std::make_shared(), false), + _column_id(column_id), + _value(value) {} + + Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, + size_t count, ColumnPtr& result_column) const override { + const auto& input = nullable_nested_column(*block, _column_id); + auto result = ColumnUInt8::create(); + auto& result_data = result->get_data(); + result_data.resize(count); + for (size_t row = 0; row < count; ++row) { + const size_t input_row = selector == nullptr ? row : (*selector)[row]; + result_data[row] = input.get_element(input_row) > _value; + } + result_column = std::move(result); + return Status::OK(); + } + + const std::string& expr_name() const override { return _expr_name; } + +private: + const int _column_id; + const int32_t _value; + const std::string _expr_name = "Int32GreaterThanExpr"; +}; + +class Int32SumGreaterThanExpr final : public VExpr { +public: + Int32SumGreaterThanExpr(int left_column_id, int right_column_id, int32_t value) + : VExpr(std::make_shared(), false), + _left_column_id(left_column_id), + _right_column_id(right_column_id), + _value(value) {} + + Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, + size_t count, ColumnPtr& result_column) const override { + const auto& left_input = nullable_nested_column(*block, _left_column_id); + const auto& right_input = nullable_nested_column(*block, _right_column_id); + auto result = ColumnUInt8::create(); + auto& result_data = result->get_data(); + result_data.resize(count); + for (size_t row = 0; row < count; ++row) { + const size_t input_row = selector == nullptr ? row : (*selector)[row]; + result_data[row] = + left_input.get_element(input_row) + right_input.get_element(input_row) > _value; + } + result_column = std::move(result); + return Status::OK(); + } + + const std::string& expr_name() const override { return _expr_name; } + +private: + const int _left_column_id; + const int _right_column_id; + const int32_t _value; + const std::string _expr_name = "Int32SumGreaterThanExpr"; +}; + +class StringInExpr final : public VExpr { +public: + StringInExpr(int column_id, std::vector values) + : VExpr(std::make_shared(), false), + _column_id(column_id), + _values(std::move(values)) {} + + Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, + size_t count, ColumnPtr& result_column) const override { + const auto& input = nullable_nested_column(*block, _column_id); + auto result = ColumnUInt8::create(); + auto& result_data = result->get_data(); + result_data.resize(count); + for (size_t row = 0; row < count; ++row) { + const size_t input_row = selector == nullptr ? row : (*selector)[row]; + const auto value = input.get_data_at(input_row).to_string(); + result_data[row] = std::find(_values.begin(), _values.end(), value) != _values.end(); + } + result_column = std::move(result); + return Status::OK(); + } + + const std::string& expr_name() const override { return _expr_name; } + +private: + const int _column_id; + const std::vector _values; + const std::string _expr_name = "StringInExpr"; +}; + +VExprContextSPtr create_int32_greater_than_conjunct(int column_id, int32_t value) { + auto ctx = + VExprContext::create_shared(std::make_shared(column_id, value)); + ctx->_prepared = true; + ctx->_opened = true; + return ctx; +} + +VExprContextSPtr create_int32_sum_greater_than_conjunct(int left_column_id, int right_column_id, + int32_t value) { + auto ctx = VExprContext::create_shared( + std::make_shared(left_column_id, right_column_id, value)); + ctx->_prepared = true; + ctx->_opened = true; + return ctx; +} + +VExprContextSPtr create_string_in_conjunct(int column_id, std::vector values) { + auto ctx = VExprContext::create_shared( + std::make_shared(column_id, std::move(values))); + ctx->_prepared = true; + ctx->_opened = true; + return ctx; +} + +std::shared_ptr finish_array(arrow::ArrayBuilder* builder) { + std::shared_ptr array; + EXPECT_TRUE(builder->Finish(&array).ok()); + return array; +} + +std::shared_ptr build_int32_array(const std::vector& values) { + arrow::Int32Builder builder; + for (const auto value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return finish_array(&builder); +} + +std::shared_ptr build_string_array(const std::vector& values) { + arrow::StringBuilder builder; + for (const auto& value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return finish_array(&builder); +} + +std::shared_ptr build_timestamp_array(const std::shared_ptr& type, + const std::vector& values) { + arrow::TimestampBuilder builder(type, arrow::default_memory_pool()); + for (const auto value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return finish_array(&builder); +} + +std::shared_ptr build_struct_array(const std::vector& ids, + const std::vector& names) { + auto struct_type = arrow::struct_({arrow::field("id", arrow::int32(), false), + arrow::field("name", arrow::utf8(), false)}); + std::vector> field_builders; + auto id_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(id_builder))); + auto name_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(name_builder))); + arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(), + std::move(field_builders)); + auto* struct_id_builder = assert_cast(builder.field_builder(0)); + auto* struct_name_builder = assert_cast(builder.field_builder(1)); + for (size_t row = 0; row < ids.size(); ++row) { + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(struct_id_builder->Append(ids[row]).ok()); + EXPECT_TRUE(struct_name_builder->Append(names[row]).ok()); + } + return finish_array(&builder); +} + +void write_parquet_file(const std::string& file_path, int64_t row_group_size = ROW_COUNT) { + auto schema = arrow::schema({ + arrow::field("id", arrow::int32(), false), + arrow::field("value", arrow::utf8(), false), + }); + auto table = arrow::Table::Make(schema, + {build_int32_array({1, 2, 3, 4, 5}), + build_string_array({"one", "two", "three", "four", "five"})}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + row_group_size, builder.build())); +} + +std::shared_ptr build_nullable_int_string_map_array() { + auto key_builder = std::make_shared(); + auto value_builder = std::make_shared(); + auto map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)); + arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, value_builder, map_type); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(10).ok()); + EXPECT_TRUE(value_builder->Append("small").ok()); + + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.AppendEmptyValue().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(20).ok()); + EXPECT_TRUE(value_builder->Append(std::string(4096, 'x')).ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(30).ok()); + EXPECT_TRUE(value_builder->AppendNull().ok()); + return finish_array(&builder); +} + +std::shared_ptr build_nullable_string_list_array() { + auto value_builder = std::make_shared(); + arrow::ListBuilder builder(arrow::default_memory_pool(), value_builder, + arrow::list(arrow::field("element", arrow::utf8(), true))); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(value_builder->Append("small").ok()); + EXPECT_TRUE(value_builder->Append(std::string(4096, 'a')).ok()); + + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.AppendEmptyValue().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(value_builder->AppendNull().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(value_builder->Append(std::string(4096, 'b')).ok()); + return finish_array(&builder); +} + +std::shared_ptr build_nullable_string_struct_array() { + auto struct_type = arrow::struct_({arrow::field("payload", arrow::utf8(), true), + arrow::field("id", arrow::int32(), false)}); + std::vector> field_builders; + auto payload_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(payload_builder))); + auto id_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(id_builder))); + arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(), + std::move(field_builders)); + auto* struct_payload_builder = assert_cast(builder.field_builder(0)); + auto* struct_id_builder = assert_cast(builder.field_builder(1)); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(struct_payload_builder->Append("small").ok()); + EXPECT_TRUE(struct_id_builder->Append(1).ok()); + + EXPECT_TRUE(builder.AppendNull().ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(struct_payload_builder->Append(std::string(4096, 'c')).ok()); + EXPECT_TRUE(struct_id_builder->Append(2).ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(struct_payload_builder->AppendNull().ok()); + EXPECT_TRUE(struct_id_builder->Append(3).ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(struct_payload_builder->Append(std::string(4096, 'd')).ok()); + EXPECT_TRUE(struct_id_builder->Append(4).ok()); + return finish_array(&builder); +} + +void write_nullable_map_parquet_file(const std::string& file_path) { + auto array = build_nullable_int_string_map_array(); + auto field = arrow::field("arr", array->type(), true); + auto table = arrow::Table::Make(arrow::schema({field}), {array}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + ROW_COUNT, builder.build())); +} + +void write_nullable_string_list_parquet_file(const std::string& file_path) { + auto array = build_nullable_string_list_array(); + auto field = arrow::field("arr", array->type(), true); + auto table = arrow::Table::Make(arrow::schema({field}), {array}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + ROW_COUNT, builder.build())); +} + +void write_nullable_string_struct_parquet_file(const std::string& file_path) { + auto array = build_nullable_string_struct_array(); + auto field = arrow::field("s", array->type(), true); + auto table = arrow::Table::Make(arrow::schema({field}), {array}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + ROW_COUNT, builder.build())); +} + +void write_int96_timestamp_parquet_file(const std::string& file_path) { + auto field = arrow::field("ts_tz", arrow::timestamp(arrow::TimeUnit::MICRO), true); + auto array = + build_timestamp_array(arrow::timestamp(arrow::TimeUnit::MICRO), + {1735660800000000LL, 1735660800123456LL, 1735689600000000LL}); + auto table = arrow::Table::Make(arrow::schema({field}), {array}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder writer_builder; + writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6); + writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + writer_builder.compression(::parquet::Compression::UNCOMPRESSED); + ::parquet::ArrowWriterProperties::Builder arrow_builder; + arrow_builder.enable_force_write_int96_timestamps(); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + ROW_COUNT, writer_builder.build(), + arrow_builder.build())); +} + +void write_int_pair_parquet_file(const std::string& file_path, int64_t row_group_size = ROW_COUNT) { + auto schema = arrow::schema({ + arrow::field("id", arrow::int32(), false), + arrow::field("score", arrow::int32(), false), + arrow::field("value", arrow::utf8(), false), + }); + auto table = arrow::Table::Make( + schema, {build_int32_array({1, 2, 3, 4, 5}), build_int32_array({1, 2, 3, 4, 5}), + build_string_array({"one", "two", "three", "four", "five"})}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + row_group_size, builder.build())); +} + +void write_condition_cache_parquet_file(const std::string& file_path) { + constexpr int64_t row_count = ConditionCacheContext::GRANULE_SIZE * 2; + std::vector ids(row_count); + std::iota(ids.begin(), ids.end(), 0); + + auto schema = arrow::schema({arrow::field("id", arrow::int32(), false)}); + auto table = arrow::Table::Make(schema, {build_int32_array(ids)}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + row_count, builder.build())); +} + +void write_struct_filter_parquet_file(const std::string& file_path) { + auto id_field = arrow::field("id", arrow::int32(), false); + auto name_field = arrow::field("name", arrow::utf8(), false); + auto struct_type = arrow::struct_({id_field, name_field}); + auto schema = arrow::schema({ + arrow::field("s", struct_type, false), + }); + auto table = arrow::Table::Make( + schema, {build_struct_array({1, 2, 10, 11}, {"one", "two", "ten", "eleven"})}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 2, + builder.build())); +} + +void write_dictionary_filter_parquet_file(const std::string& file_path) { + auto schema = arrow::schema({ + arrow::field("id", arrow::int32(), false), + arrow::field("value", arrow::utf8(), false), + }); + auto table = + arrow::Table::Make(schema, {build_int32_array({1, 2, 3, 4, 5, 6}), + build_string_array({"aa", "az", "lm", "lz", "za", "zz"})}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + builder.enable_dictionary("value"); + builder.disable_dictionary("id"); + builder.disable_statistics(); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1, + builder.build())); +} + +void write_nested_dictionary_filter_parquet_file(const std::string& file_path) { + auto id_field = arrow::field("id", arrow::int32(), false); + auto name_field = arrow::field("name", arrow::utf8(), false); + auto struct_type = arrow::struct_({id_field, name_field}); + auto schema = arrow::schema({ + arrow::field("s", struct_type, false), + }); + auto table = arrow::Table::Make( + schema, {build_struct_array({1, 2, 3, 4, 5, 6}, {"aa", "az", "lm", "lz", "za", "zz"})}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + builder.enable_dictionary("s.name"); + builder.disable_dictionary("s.identifier.field_id"); + builder.disable_statistics(); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1, + builder.build())); +} + +void write_dictionary_edge_parquet_file(const std::string& file_path) { + auto schema = arrow::schema({ + arrow::field("id", arrow::int32(), false), + arrow::field("value", arrow::utf8(), false), + }); + auto table = arrow::Table::Make( + schema, + {build_int32_array({1, 2, 3, 4, 5, 6, 7, 8}), + build_string_array({"", "same", "other", "long-value", "", "tail", "same", "last"})}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + builder.enable_dictionary("value"); + builder.disable_dictionary("id"); + builder.disable_statistics(); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 2, + builder.build())); +} + +void write_nested_page_index_filter_parquet_file(const std::string& file_path) { + std::vector ids(128); + std::iota(ids.begin(), ids.end(), 0); + std::vector names; + names.reserve(ids.size()); + for (const auto id : ids) { + names.push_back("name-" + std::to_string(id)); + } + auto id_field = arrow::field("id", arrow::int32(), false); + auto name_field = arrow::field("name", arrow::utf8(), false); + auto struct_type = arrow::struct_({id_field, name_field}); + auto schema = arrow::schema({ + arrow::field("s", struct_type, false), + }); + auto table = arrow::Table::Make(schema, {build_struct_array(ids, names)}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + builder.disable_dictionary(); + builder.enable_write_page_index(); + builder.write_batch_size(8); + builder.data_pagesize(10); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + ids.size(), builder.build())); +} + +void write_page_index_filter_parquet_file(const std::string& file_path) { + std::vector ids(128); + std::iota(ids.begin(), ids.end(), 0); + auto schema = arrow::schema({ + arrow::field("id", arrow::int32(), false), + }); + auto table = arrow::Table::Make(schema, {build_int32_array(ids)}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + builder.disable_dictionary(); + builder.enable_write_page_index(); + builder.write_batch_size(8); + builder.data_pagesize(10); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + ids.size(), builder.build())); +} + +void write_page_index_filter_pair_parquet_file(const std::string& file_path) { + std::vector ids(128); + std::iota(ids.begin(), ids.end(), 0); + std::vector payloads; + payloads.reserve(ids.size()); + for (const auto id : ids) { + payloads.push_back(id + 1000); + } + auto schema = arrow::schema({ + arrow::field("id", arrow::int32(), false), + arrow::field("payload", arrow::int32(), false), + }); + auto table = arrow::Table::Make(schema, {build_int32_array(ids), build_int32_array(payloads)}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + builder.disable_dictionary(); + builder.enable_write_page_index(); + builder.write_batch_size(8); + builder.data_pagesize(10); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + ids.size(), builder.build())); +} + +Block build_file_block(const std::vector& schema) { + Block block; + for (const auto& field : schema) { + block.insert({field.type->create_column(), field.type, field.name}); + } + return block; +} + +Block build_file_block_with_row_position(const std::vector& schema) { + auto block = build_file_block(schema); + const auto row_position_field = format::row_position_column_definition(); + block.insert({row_position_field.type->create_column(), row_position_field.type, + row_position_field.name}); + return block; +} + +void use_schema_order_positions(format::FileScanRequest* request, + const std::vector& schema) { + DORIS_CHECK(request != nullptr); + for (size_t idx = 0; idx < schema.size(); ++idx) { + request->local_positions.emplace(format::LocalColumnId(schema[idx].local_id), + format::LocalIndex(idx)); + } +} + +int64_t parquet_column_start_offset(const ::parquet::ColumnChunkMetaData& column_metadata) { + return column_metadata.has_dictionary_page() + ? static_cast(column_metadata.dictionary_page_offset()) + : static_cast(column_metadata.data_page_offset()); +} + +std::pair row_group_mid_range(const std::string& file_path, int row_group_idx) { + auto reader = ::parquet::ParquetFileReader::OpenFile(file_path, false); + auto metadata = reader->metadata(); + auto row_group_metadata = metadata->RowGroup(row_group_idx); + auto first_column = row_group_metadata->ColumnChunk(0); + auto last_column = row_group_metadata->ColumnChunk(row_group_metadata->num_columns() - 1); + const int64_t row_group_start_offset = parquet_column_start_offset(*first_column); + const int64_t row_group_end_offset = + parquet_column_start_offset(*last_column) + last_column->total_compressed_size(); + const int64_t row_group_mid_offset = + row_group_start_offset + (row_group_end_offset - row_group_start_offset) / 2; + return {row_group_mid_offset, 1}; +} + +GlobalRowLoacationV2 decode_rowid(const ColumnString& column, size_t row) { + const auto ref = column.get_data_at(row); + EXPECT_EQ(ref.size, sizeof(GlobalRowLoacationV2)); + GlobalRowLoacationV2 location(0, 0, 0, 0); + std::memcpy(&location, ref.data, sizeof(GlobalRowLoacationV2)); + return location; +} + +class TestFileReader final : public format::FileReader { +public: + TestFileReader(std::shared_ptr& system_properties, + std::unique_ptr& file_description, + std::shared_ptr io_ctx) + : format::FileReader(system_properties, file_description, io_ctx, nullptr) {} + + Status get_schema(std::vector* file_schema) const override { + file_schema->clear(); + format::ColumnDefinition field; + field.identifier = Field::create_field(0); + field.name = "id"; + field.type = std::make_shared(); + file_schema->push_back(std::move(field)); + return Status::OK(); + } + + bool has_request() const { return _request != nullptr; } + + bool eof() const { return _eof; } + + bool has_io_context() const { return _io_ctx != nullptr; } + + long io_context_use_count() const { return _io_ctx.use_count(); } +}; + +TEST(FileReaderTest, OpenStoresRequestAndCloseKeepsRequest) { + auto system_properties = std::make_shared(); + system_properties->system_type = TFileType::FILE_LOCAL; + auto file_description = std::make_unique(); + auto io_ctx = std::make_shared(); + TestFileReader reader(system_properties, file_description, io_ctx); + + auto request = std::make_shared(); + request->non_predicate_columns.push_back(field_projection(0)); + ASSERT_TRUE(reader.open(request).ok()); + EXPECT_NE(request, nullptr); + EXPECT_TRUE(reader.has_request()); + + ASSERT_TRUE(reader.close().ok()); + EXPECT_TRUE(reader.has_request()); + EXPECT_TRUE(reader.eof()); +} + +TEST(FileReaderTest, CloseReleasesSharedIOContext) { + auto system_properties = std::make_shared(); + system_properties->system_type = TFileType::FILE_LOCAL; + auto file_description = std::make_unique(); + auto io_ctx = std::make_shared(); + std::weak_ptr weak_io_ctx = io_ctx; + TestFileReader reader(system_properties, file_description, io_ctx); + + EXPECT_TRUE(reader.has_io_context()); + EXPECT_EQ(reader.io_context_use_count(), 2); + io_ctx.reset(); + EXPECT_FALSE(weak_io_ctx.expired()); + EXPECT_EQ(reader.io_context_use_count(), 1); + + ASSERT_TRUE(reader.close().ok()); + EXPECT_FALSE(reader.has_io_context()); + EXPECT_TRUE(weak_io_ctx.expired()); +} + +class NewParquetReaderTest : public testing::Test { +protected: + void SetUp() override { + _test_dir = std::filesystem::temp_directory_path() / "doris_format_v2_parquet_reader_test"; + std::filesystem::remove_all(_test_dir); + std::filesystem::create_directories(_test_dir); + _file_path = (_test_dir / "reader.parquet").string(); + write_parquet_file(_file_path); + } + + void TearDown() override { std::filesystem::remove_all(_test_dir); } + + std::unique_ptr create_reader( + int64_t range_start_offset = 0, int64_t range_size = -1, + RuntimeProfile* profile = nullptr, bool enable_mapping_timestamp_tz = false, + std::shared_ptr io_ctx = nullptr, + std::optional global_rowid_context = std::nullopt) const { + auto system_properties = std::make_shared(); + system_properties->system_type = TFileType::FILE_LOCAL; + auto file_description = std::make_unique(); + file_description->path = _file_path; + file_description->file_size = static_cast(std::filesystem::file_size(_file_path)); + file_description->range_start_offset = range_start_offset; + file_description->range_size = range_size; + return std::make_unique( + system_properties, file_description, std::move(io_ctx), profile, + global_rowid_context, enable_mapping_timestamp_tz); + } + + std::filesystem::path _test_dir; + std::string _file_path; +}; + +TEST_F(NewParquetReaderTest, GetSchemaReturnsFileLocalColumns) { + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 2); + EXPECT_EQ(schema[0].local_id, 0); + EXPECT_EQ(schema[0].name, "id"); + ASSERT_TRUE(schema[0].type->is_nullable()); + EXPECT_EQ(remove_nullable(schema[0].type)->get_primitive_type(), TYPE_INT); + EXPECT_EQ(schema[1].local_id, 1); + EXPECT_EQ(schema[1].name, "value"); + ASSERT_TRUE(schema[1].type->is_nullable()); + EXPECT_EQ(remove_nullable(schema[1].type)->get_primitive_type(), TYPE_STRING); +} + +// Scenario: Parquet is columnar and supports predicate/non-predicate split, nested projection and +// file-layer pruning hints. The reader declares those scan-request capabilities by choosing +// ParquetColumnMapper itself. +TEST_F(NewParquetReaderTest, CreatesParquetColumnMapper) { + auto reader = create_reader(); + auto mapper = + reader->create_column_mapper({.mode = format::TableColumnMappingMode::BY_FIELD_ID}); + + ASSERT_NE(dynamic_cast(mapper.get()), nullptr); +} + +TEST_F(NewParquetReaderTest, CountComplexColumnUsesShapeOnlyPath) { + write_nullable_map_parquet_file(_file_path); + RuntimeProfile profile("count_map_shape_only_path"); + auto reader = create_reader(0, -1, &profile); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + ASSERT_TRUE(reader->open(std::make_shared()).ok()); + + format::FileAggregateRequest request; + request.agg_type = TPushAggOp::type::COUNT; + request.columns.push_back( + {.projection = format::LocalColumnIndex::top_level(format::LocalColumnId(0))}); + format::FileAggregateResult result; + ASSERT_TRUE(reader->get_aggregate_result(request, &result).ok()); + + // Rows are: non-empty map, NULL map, empty map, non-empty map with large value string, + // non-empty map with NULL value. COUNT(arr) excludes only the top-level NULL map. + EXPECT_EQ(result.count, 4); + ASSERT_NE(profile.get_counter("MaterializationTime"), nullptr); + EXPECT_EQ(profile.get_counter("MaterializationTime")->value(), 0); +} + +TEST_F(NewParquetReaderTest, CountArrayColumnUsesLevelsOnlyPath) { + write_nullable_string_list_parquet_file(_file_path); + RuntimeProfile profile("count_array_levels_only_path"); + auto reader = create_reader(0, -1, &profile); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + ASSERT_TRUE(reader->open(std::make_shared()).ok()); + + format::FileAggregateRequest request; + request.agg_type = TPushAggOp::type::COUNT; + request.columns.push_back( + {.projection = format::LocalColumnIndex::top_level(format::LocalColumnId(0))}); + format::FileAggregateResult result; + ASSERT_TRUE(reader->get_aggregate_result(request, &result).ok()); + + // Rows are: non-empty array with a large string, NULL array, empty array, non-empty array + // with NULL element, non-empty array with a large string. Only the top-level NULL is excluded. + EXPECT_EQ(result.count, 4); + ASSERT_NE(profile.get_counter("MaterializationTime"), nullptr); + EXPECT_EQ(profile.get_counter("MaterializationTime")->value(), 0); +} + +TEST_F(NewParquetReaderTest, CountStructColumnUsesLevelsOnlyPath) { + write_nullable_string_struct_parquet_file(_file_path); + RuntimeProfile profile("count_struct_levels_only_path"); + auto reader = create_reader(0, -1, &profile); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + ASSERT_TRUE(reader->open(std::make_shared()).ok()); + + format::FileAggregateRequest request; + request.agg_type = TPushAggOp::type::COUNT; + request.columns.push_back( + {.projection = format::LocalColumnIndex::top_level(format::LocalColumnId(0))}); + format::FileAggregateResult result; + ASSERT_TRUE(reader->get_aggregate_result(request, &result).ok()); + + // The representative STRUCT leaf is the first child, a nullable STRING payload. A row with + // NULL payload but non-NULL struct still counts; only the top-level NULL struct is excluded. + EXPECT_EQ(result.count, 4); + ASSERT_NE(profile.get_counter("MaterializationTime"), nullptr); + EXPECT_EQ(profile.get_counter("MaterializationTime")->value(), 0); +} + +TEST_F(NewParquetReaderTest, GetSchemaReturnsNullableNestedChildren) { + write_struct_filter_parquet_file(_file_path); + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 1); + EXPECT_EQ(schema[0].name, "s"); + ASSERT_TRUE(schema[0].type->is_nullable()); + ASSERT_EQ(schema[0].children.size(), 2); + EXPECT_EQ(schema[0].children[0].name, "id"); + ASSERT_TRUE(schema[0].children[0].type->is_nullable()); + EXPECT_EQ(remove_nullable(schema[0].children[0].type)->get_primitive_type(), TYPE_INT); + EXPECT_EQ(schema[0].children[1].name, "name"); + ASSERT_TRUE(schema[0].children[1].type->is_nullable()); + EXPECT_EQ(remove_nullable(schema[0].children[1].type)->get_primitive_type(), TYPE_STRING); + + const auto* struct_type = + assert_cast(remove_nullable(schema[0].type).get()); + ASSERT_EQ(struct_type->get_elements().size(), 2); + EXPECT_TRUE(struct_type->get_element(0)->is_nullable()); + EXPECT_TRUE(struct_type->get_element(1)->is_nullable()); +} + +TEST_F(NewParquetReaderTest, GetSchemaMapsInt96ToTimestampTzWhenTimestampTzMappingEnabled) { + write_int96_timestamp_parquet_file(_file_path); + auto reader = create_reader(0, -1, nullptr, true); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 1); + EXPECT_EQ(schema[0].name, "ts_tz"); + ASSERT_TRUE(schema[0].type->is_nullable()); + EXPECT_EQ(remove_nullable(schema[0].type)->get_primitive_type(), TYPE_TIMESTAMPTZ); + EXPECT_EQ(remove_nullable(schema[0].type)->get_scale(), 6); +} + +TEST_F(NewParquetReaderTest, ReadSingleRowGroupThenEof) { + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + Block block = build_file_block(schema); + + auto request = std::make_shared(); + request->non_predicate_columns = {field_projection(0), field_projection(1)}; + ASSERT_TRUE(reader->open(request).ok()); + + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_FALSE(eof); + ASSERT_EQ(rows, ROW_COUNT); + + const auto& ids = nullable_nested_column(block, 0); + const auto& values = nullable_nested_column(block, 1); + ASSERT_EQ(ids.size(), ROW_COUNT); + ASSERT_EQ(values.size(), ROW_COUNT); + EXPECT_EQ(ids.get_element(0), 1); + EXPECT_EQ(ids.get_element(4), 5); + EXPECT_EQ(values.get_data_at(0).to_string(), "one"); + EXPECT_EQ(values.get_data_at(4).to_string(), "five"); + + rows = 0; + eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_TRUE(eof); + EXPECT_EQ(rows, 0); +} + +TEST_F(NewParquetReaderTest, RespectsConfiguredBatchSize) { + auto reader = create_reader(); + reader->set_batch_size(1); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + + auto request = std::make_shared(); + request->non_predicate_columns = {field_projection(0), field_projection(1)}; + ASSERT_TRUE(reader->open(request).ok()); + + for (int32_t expected_id = 1; expected_id <= ROW_COUNT; ++expected_id) { + Block block = build_file_block(schema); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_FALSE(eof); + ASSERT_EQ(rows, 1); + const auto& ids = nullable_nested_column(block, 0); + ASSERT_EQ(ids.size(), 1); + EXPECT_EQ(ids.get_element(0), expected_id); + } + + Block block = build_file_block(schema); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_TRUE(eof); + EXPECT_EQ(rows, 0); +} + +TEST_F(NewParquetReaderTest, ConditionCacheMissMarksSurvivingGranules) { + write_condition_cache_parquet_file(_file_path); + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 1); + + auto request = std::make_shared(); + request->predicate_columns = {field_projection(0)}; + request->conjuncts.push_back( + create_int32_greater_than_conjunct(0, ConditionCacheContext::GRANULE_SIZE - 1)); + use_schema_order_positions(request.get(), schema); + ASSERT_TRUE(reader->open(request).ok()); + + auto ctx = std::make_shared(); + ctx->is_hit = false; + ctx->filter_result = std::make_shared>(3, false); + reader->set_condition_cache_context(ctx); + + std::vector ids; + bool eof = false; + while (!eof) { + Block block = build_file_block(schema); + size_t rows = 0; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + if (rows == 0) { + continue; + } + const auto& id_column = nullable_nested_column(block, 0); + for (size_t row = 0; row < rows; ++row) { + ids.push_back(id_column.get_element(row)); + } + } + + ASSERT_EQ(ids.size(), ConditionCacheContext::GRANULE_SIZE); + EXPECT_EQ(ids.front(), ConditionCacheContext::GRANULE_SIZE); + EXPECT_EQ(ids.back(), ConditionCacheContext::GRANULE_SIZE * 2 - 1); + EXPECT_FALSE((*ctx->filter_result)[0]); + EXPECT_TRUE((*ctx->filter_result)[1]); + EXPECT_FALSE((*ctx->filter_result)[2]); +} + +TEST_F(NewParquetReaderTest, ConditionCacheHitSkipsFalseGranulesBeforeColumnRead) { + write_condition_cache_parquet_file(_file_path); + auto io_ctx = std::make_shared(); + auto reader = create_reader(0, -1, nullptr, false, io_ctx); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 1); + + auto request = std::make_shared(); + request->predicate_columns = {field_projection(0)}; + request->conjuncts.push_back( + create_int32_greater_than_conjunct(0, ConditionCacheContext::GRANULE_SIZE - 1)); + use_schema_order_positions(request.get(), schema); + ASSERT_TRUE(reader->open(request).ok()); + + auto ctx = std::make_shared(); + ctx->is_hit = true; + ctx->filter_result = + std::make_shared>(std::vector {false, true, false}); + reader->set_condition_cache_context(ctx); + + Block block = build_file_block(schema); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_FALSE(eof); + ASSERT_EQ(rows, ConditionCacheContext::GRANULE_SIZE); + EXPECT_EQ(io_ctx->condition_cache_filtered_rows, ConditionCacheContext::GRANULE_SIZE); + + const auto& ids = nullable_nested_column(block, 0); + EXPECT_EQ(ids.get_element(0), ConditionCacheContext::GRANULE_SIZE); + EXPECT_EQ(ids.get_element(rows - 1), ConditionCacheContext::GRANULE_SIZE * 2 - 1); + + block = build_file_block(schema); + rows = 0; + eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_TRUE(eof); + EXPECT_EQ(rows, 0); +} + +TEST_F(NewParquetReaderTest, ReadMultipleRowGroups) { + write_parquet_file(_file_path, 2); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3); + + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + auto request = std::make_shared(); + request->non_predicate_columns = {field_projection(0), field_projection(1)}; + ASSERT_TRUE(reader->open(request).ok()); + + std::vector ids; + std::vector values; + bool eof = false; + while (!eof) { + Block block = build_file_block(schema); + size_t rows = 0; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + if (rows == 0) { + continue; + } + const auto& id_column = nullable_nested_column(block, 0); + const auto& value_column = nullable_nested_column(block, 1); + for (size_t row = 0; row < rows; ++row) { + ids.push_back(id_column.get_element(row)); + values.push_back(value_column.get_data_at(row).to_string()); + } + } + + EXPECT_EQ(ids, std::vector({1, 2, 3, 4, 5})); + EXPECT_EQ(values, std::vector({"one", "two", "three", "four", "five"})); +} + +TEST_F(NewParquetReaderTest, RewriteSameLocalPathDoesNotReuseUnknownMtimePageCache) { + RuntimeProfile first_profile("new_parquet_reader_first_unknown_mtime"); + { + auto reader = create_reader(0, -1, &first_profile); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + auto request = std::make_shared(); + request->non_predicate_columns = {field_projection(0), field_projection(1)}; + ASSERT_TRUE(reader->open(request).ok()); + + bool eof = false; + while (!eof) { + Block block = build_file_block(schema); + size_t rows = 0; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + } + } + + ASSERT_NE(first_profile.get_counter("PageReadCount"), nullptr); + ASSERT_NE(first_profile.get_counter("PageCacheWriteCount"), nullptr); + EXPECT_EQ(first_profile.get_counter("PageReadCount")->value(), 0); + EXPECT_EQ(first_profile.get_counter("PageCacheWriteCount")->value(), 0); + + // LocalFileReader reports mtime as 0. Rewriting the same path must not reuse page-cache bytes + // from the previous physical file, even when the query option enables parquet file page cache. + write_int_pair_parquet_file(_file_path); + RuntimeProfile second_profile("new_parquet_reader_second_unknown_mtime"); + auto reader = create_reader(0, -1, &second_profile); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + auto request = std::make_shared(); + request->non_predicate_columns = {field_projection(0), field_projection(1)}; + ASSERT_TRUE(reader->open(request).ok()); + + std::vector ids; + std::vector scores; + bool eof = false; + while (!eof) { + Block block = build_file_block(schema); + size_t rows = 0; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + if (rows == 0) { + continue; + } + const auto& id_column = nullable_nested_column(block, 0); + const auto& score_column = nullable_nested_column(block, 1); + for (size_t row = 0; row < rows; ++row) { + ids.push_back(id_column.get_element(row)); + scores.push_back(score_column.get_element(row)); + } + } + + EXPECT_EQ(ids, std::vector({1, 2, 3, 4, 5})); + EXPECT_EQ(scores, std::vector({1, 2, 3, 4, 5})); + ASSERT_NE(second_profile.get_counter("PageReadCount"), nullptr); + ASSERT_NE(second_profile.get_counter("PageCacheWriteCount"), nullptr); + EXPECT_EQ(second_profile.get_counter("PageReadCount")->value(), 0); + EXPECT_EQ(second_profile.get_counter("PageCacheWriteCount")->value(), 0); +} + +TEST_F(NewParquetReaderTest, ReadPredicateAndNonPredicateColumnsWithSelection) { + RuntimeProfile profile("new_parquet_reader_filter_profile"); + auto reader = create_reader(0, -1, &profile); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + Block block = build_file_block(schema); + + auto request = std::make_shared(); + request->predicate_columns = {field_projection(0)}; + request->non_predicate_columns = {field_projection(1)}; + request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2)); + format::FileColumnPredicateFilter column_filter; + column_filter.file_column_id = format::LocalColumnId(0); + column_filter.predicates.push_back(create_comparison_predicate( + 0, "id", schema[0].type, Field::create_field(2), false)); + request->column_predicate_filters.push_back(std::move(column_filter)); + ASSERT_TRUE(reader->open(request).ok()); + + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_FALSE(eof); + ASSERT_EQ(rows, 3); + + const auto& ids = nullable_nested_column(block, 0); + const auto& values = nullable_nested_column(block, 1); + ASSERT_EQ(ids.size(), 3); + ASSERT_EQ(values.size(), 3); + EXPECT_EQ(ids.get_element(0), 3); + EXPECT_EQ(ids.get_element(1), 4); + EXPECT_EQ(ids.get_element(2), 5); + EXPECT_EQ(values.get_data_at(0).to_string(), "three"); + EXPECT_EQ(values.get_data_at(1).to_string(), "four"); + EXPECT_EQ(values.get_data_at(2).to_string(), "five"); + + ASSERT_NE(profile.get_counter("FileReaderCreateTime"), nullptr); + ASSERT_NE(profile.get_counter("FileNum"), nullptr); + ASSERT_NE(profile.get_counter("RawRowsRead"), nullptr); + ASSERT_NE(profile.get_counter("SelectedRows"), nullptr); + ASSERT_NE(profile.get_counter("RowsFilteredByConjunct"), nullptr); + ASSERT_NE(profile.get_counter("TotalBatches"), nullptr); + ASSERT_NE(profile.get_counter("EmptySelectionBatches"), nullptr); + ASSERT_NE(profile.get_counter("ReaderReadRows"), nullptr); + ASSERT_NE(profile.get_counter("ReaderSkipRows"), nullptr); + ASSERT_NE(profile.get_counter("ReaderSelectRows"), nullptr); + ASSERT_NE(profile.get_counter("ArrowReadRecordsTime"), nullptr); + ASSERT_NE(profile.get_counter("MaterializationTime"), nullptr); + ASSERT_GT(profile.get_counter("FileReaderCreateTime")->value(), 0); + EXPECT_EQ(profile.get_counter("FileNum")->value(), 1); + EXPECT_EQ(profile.get_counter("RawRowsRead")->value(), ROW_COUNT); + EXPECT_EQ(profile.get_counter("SelectedRows")->value(), 3); + EXPECT_EQ(profile.get_counter("RowsFilteredByConjunct")->value(), 2); + EXPECT_EQ(profile.get_counter("TotalBatches")->value(), 1); + EXPECT_EQ(profile.get_counter("EmptySelectionBatches")->value(), 0); + EXPECT_EQ(profile.get_counter("ReaderReadRows")->value(), ROW_COUNT + 3); + EXPECT_EQ(profile.get_counter("ReaderSkipRows")->value(), 2); + EXPECT_EQ(profile.get_counter("ReaderSelectRows")->value(), 3); + EXPECT_GT(profile.get_counter("ArrowReadRecordsTime")->value(), 0); + EXPECT_GT(profile.get_counter("MaterializationTime")->value(), 0); + + rows = 0; + eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_TRUE(eof); + EXPECT_EQ(rows, 0); +} + +TEST_F(NewParquetReaderTest, GlobalRowIdSchemaAndSelectionUseFileRowPosition) { + format::GlobalRowIdContext context {.version = 7, .backend_id = 123456789, .file_id = 42}; + auto reader = create_reader(0, -1, nullptr, false, nullptr, context); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 3); + EXPECT_EQ(schema[2].local_id, format::GLOBAL_ROWID_COLUMN_ID); + EXPECT_EQ(schema[2].column_type, format::GLOBAL_ROWID); + Block block = build_file_block(schema); + + auto request = std::make_shared(); + request->predicate_columns = {field_projection(0)}; + request->non_predicate_columns = {field_projection(1), + field_projection(format::GLOBAL_ROWID_COLUMN_ID)}; + request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2)); + use_schema_order_positions(request.get(), schema); + ASSERT_TRUE(reader->open(request).ok()); + + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_FALSE(eof); + ASSERT_EQ(rows, 3); + + const auto& ids = nullable_nested_column(block, 0); + const auto& values = nullable_nested_column(block, 1); + const auto& rowids = assert_cast(*block.get_by_position(2).column); + ASSERT_EQ(ids.size(), 3); + ASSERT_EQ(values.size(), 3); + ASSERT_EQ(rowids.size(), 3); + EXPECT_EQ(ids.get_element(0), 3); + EXPECT_EQ(ids.get_element(1), 4); + EXPECT_EQ(ids.get_element(2), 5); + EXPECT_EQ(values.get_data_at(0).to_string(), "three"); + EXPECT_EQ(values.get_data_at(1).to_string(), "four"); + EXPECT_EQ(values.get_data_at(2).to_string(), "five"); + + for (size_t row = 0; row < rows; ++row) { + const auto location = decode_rowid(rowids, row); + EXPECT_EQ(location.version, context.version); + EXPECT_EQ(location.backend_id, context.backend_id); + EXPECT_EQ(location.file_id, context.file_id); + EXPECT_EQ(location.row_id, static_cast(row + 2)); + } +} + +TEST_F(NewParquetReaderTest, ColumnPredicateOnlyPrunesAndDoesNotFilterRowsInsideRowGroup) { + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + Block block = build_file_block(schema); + + auto request = std::make_shared(); + request->predicate_columns = {field_projection(0)}; + request->non_predicate_columns = {field_projection(1)}; + format::FileColumnPredicateFilter column_filter; + column_filter.file_column_id = format::LocalColumnId(0); + column_filter.predicates.push_back(create_comparison_predicate( + 0, "id", schema[0].type, Field::create_field(2), false)); + request->column_predicate_filters.push_back(std::move(column_filter)); + ASSERT_TRUE(reader->open(request).ok()); + + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_FALSE(eof); + ASSERT_EQ(rows, ROW_COUNT); + + const auto& ids = nullable_nested_column(block, 0); + const auto& values = nullable_nested_column(block, 1); + ASSERT_EQ(ids.size(), ROW_COUNT); + ASSERT_EQ(values.size(), ROW_COUNT); + EXPECT_EQ(ids.get_element(0), 1); + EXPECT_EQ(ids.get_element(4), 5); + EXPECT_EQ(values.get_data_at(0).to_string(), "one"); + EXPECT_EQ(values.get_data_at(4).to_string(), "five"); +} + +TEST_F(NewParquetReaderTest, EmptySelectionUpdatesProfileCounters) { + RuntimeProfile profile("new_parquet_reader_empty_selection_profile"); + auto reader = create_reader(0, -1, &profile); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + Block block = build_file_block(schema); + + auto request = std::make_shared(); + request->predicate_columns = {field_projection(0)}; + request->non_predicate_columns = {field_projection(1)}; + request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 10)); + use_schema_order_positions(request.get(), schema); + ASSERT_TRUE(reader->open(request).ok()); + + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_TRUE(eof); + EXPECT_EQ(rows, 0); + + ASSERT_NE(profile.get_counter("RawRowsRead"), nullptr); + ASSERT_NE(profile.get_counter("SelectedRows"), nullptr); + ASSERT_NE(profile.get_counter("RowsFilteredByConjunct"), nullptr); + ASSERT_NE(profile.get_counter("TotalBatches"), nullptr); + ASSERT_NE(profile.get_counter("EmptySelectionBatches"), nullptr); + EXPECT_EQ(profile.get_counter("RawRowsRead")->value(), ROW_COUNT); + EXPECT_EQ(profile.get_counter("SelectedRows")->value(), 0); + EXPECT_EQ(profile.get_counter("RowsFilteredByConjunct")->value(), ROW_COUNT); + EXPECT_EQ(profile.get_counter("TotalBatches")->value(), 1); + EXPECT_EQ(profile.get_counter("EmptySelectionBatches")->value(), 1); +} + +TEST_F(NewParquetReaderTest, ReadMultiPredicateColumnsBeforeExpressionFilter) { + write_int_pair_parquet_file(_file_path); + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + Block block = build_file_block(schema); + + auto request = std::make_shared(); + request->predicate_columns = {field_projection(0), field_projection(1)}; + request->non_predicate_columns = {}; + request->conjuncts.push_back(create_int32_sum_greater_than_conjunct(0, 1, 7)); + ASSERT_TRUE(reader->open(request).ok()); + + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_FALSE(eof); + ASSERT_EQ(rows, 2); + + const auto& ids = nullable_nested_column(block, 0); + const auto& scores = nullable_nested_column(block, 1); + ASSERT_EQ(ids.size(), 2); + ASSERT_EQ(scores.size(), 2); + EXPECT_EQ(ids.get_element(0), 4); + EXPECT_EQ(ids.get_element(1), 5); + EXPECT_EQ(scores.get_element(0), 4); + EXPECT_EQ(scores.get_element(1), 5); +} + +TEST_F(NewParquetReaderTest, PredicateColumnFiltersBeforeNonPredicateRead) { + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + Block block = build_file_block(schema); + + auto request = std::make_shared(); + request->predicate_columns = {field_projection(0)}; + request->non_predicate_columns = {field_projection(1)}; + request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2)); + ASSERT_TRUE(reader->open(request).ok()); + + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_FALSE(eof); + ASSERT_EQ(rows, 3); + + const auto& ids = nullable_nested_column(block, 0); + const auto& values = nullable_nested_column(block, 1); + ASSERT_EQ(ids.size(), 3); + ASSERT_EQ(values.size(), 3); + EXPECT_EQ(ids.get_element(0), 3); + EXPECT_EQ(ids.get_element(1), 4); + EXPECT_EQ(ids.get_element(2), 5); + EXPECT_EQ(values.get_data_at(0).to_string(), "three"); + EXPECT_EQ(values.get_data_at(1).to_string(), "four"); + EXPECT_EQ(values.get_data_at(2).to_string(), "five"); +} + +TEST_F(NewParquetReaderTest, NonPredicateColumnKeepsSelectionFromPredicateColumn) { + write_int_pair_parquet_file(_file_path); + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + Block block = build_file_block(schema); + + auto request = std::make_shared(); + request->predicate_columns = {field_projection(0)}; + request->non_predicate_columns = {field_projection(1)}; + request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2)); + ASSERT_TRUE(reader->open(request).ok()); + + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_FALSE(eof); + ASSERT_EQ(rows, 3); + + const auto& ids = nullable_nested_column(block, 0); + const auto& scores = nullable_nested_column(block, 1); + ASSERT_EQ(ids.size(), 3); + ASSERT_EQ(scores.size(), 3); + EXPECT_EQ(ids.get_element(0), 3); + EXPECT_EQ(ids.get_element(1), 4); + EXPECT_EQ(ids.get_element(2), 5); + EXPECT_EQ(scores.get_element(0), 3); + EXPECT_EQ(scores.get_element(1), 4); + EXPECT_EQ(scores.get_element(2), 5); +} + +TEST_F(NewParquetReaderTest, PredicateFiltersRowGroupsByStatistics) { + write_parquet_file(_file_path, 2); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3); + + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + auto request = std::make_shared(); + request->predicate_columns = {field_projection(0)}; + request->non_predicate_columns = {field_projection(1)}; + request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2)); + format::FileColumnPredicateFilter column_filter; + column_filter.file_column_id = format::LocalColumnId(0); + column_filter.predicates.push_back(create_comparison_predicate( + 0, "id", schema[0].type, Field::create_field(2), false)); + request->column_predicate_filters.push_back(std::move(column_filter)); + ASSERT_TRUE(reader->open(request).ok()); + + std::vector ids; + std::vector values; + bool eof = false; + while (!eof) { + Block block = build_file_block(schema); + size_t rows = 0; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + if (rows == 0) { + continue; + } + const auto& id_column = nullable_nested_column(block, 0); + const auto& value_column = nullable_nested_column(block, 1); + for (size_t row = 0; row < rows; ++row) { + ids.push_back(id_column.get_element(row)); + values.push_back(value_column.get_data_at(row).to_string()); + } + } + + EXPECT_EQ(ids, std::vector({3, 4, 5})); + EXPECT_EQ(values, std::vector({"three", "four", "five"})); +} + +TEST_F(NewParquetReaderTest, PredicateFiltersRowGroupsByDictionary) { + write_dictionary_filter_parquet_file(_file_path); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 6); + for (int row_group_idx = 0; row_group_idx < 6; ++row_group_idx) { + auto row_group = parquet_file_reader->metadata()->RowGroup(row_group_idx); + ASSERT_NE(row_group, nullptr); + auto value_chunk = row_group->ColumnChunk(1); + ASSERT_NE(value_chunk, nullptr); + ASSERT_TRUE(value_chunk->has_dictionary_page()); + ASSERT_TRUE(value_chunk->statistics() == nullptr || + !value_chunk->statistics()->HasMinMax()); + } + + std::vector> file_schema; + auto schema_descriptor = parquet_file_reader->metadata()->schema(); + ASSERT_NE(schema_descriptor, nullptr); + ASSERT_TRUE( + format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok()); + ASSERT_EQ(file_schema.size(), 2); + + format::FileScanRequest plan_request; + format::FileColumnPredicateFilter plan_column_filter; + plan_column_filter.file_column_id = format::LocalColumnId(1); + auto value_type = std::make_shared(); + plan_column_filter.predicates.push_back(create_comparison_predicate( + 1, "value", value_type, Field::create_field("lm"), false)); + plan_request.column_predicate_filters.push_back(std::move(plan_column_filter)); + + format::parquet::RowGroupScanPlan plan; + format::parquet::ParquetScanRange scan_range; + ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(), + parquet_file_reader.get(), file_schema, + plan_request, scan_range, false, &plan) + .ok()); + EXPECT_EQ(plan.pruning_stats.total_row_groups, 6); + EXPECT_EQ(plan.pruning_stats.selected_row_groups, 1); + EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_dictionary, 5); + EXPECT_EQ(plan.pruning_stats.filtered_group_rows, 5); + EXPECT_EQ(plan.pruning_stats.selected_row_ranges, 1); + + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + auto request = std::make_shared(); + request->predicate_columns = {field_projection(1)}; + request->non_predicate_columns = {field_projection(0)}; + request->conjuncts.push_back(create_string_in_conjunct(1, {"lm"})); + use_schema_order_positions(request.get(), schema); + format::FileColumnPredicateFilter column_filter; + column_filter.file_column_id = format::LocalColumnId(1); + column_filter.predicates.push_back(create_comparison_predicate( + 1, "value", schema[1].type, Field::create_field("lm"), false)); + request->column_predicate_filters.push_back(std::move(column_filter)); + ASSERT_TRUE(reader->open(request).ok()); + + std::vector ids; + std::vector values; + bool eof = false; + while (!eof) { + Block block = build_file_block(schema); + size_t rows = 0; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + if (rows == 0) { + continue; + } + const auto& id_column = nullable_nested_column(block, 0); + const auto& value_column = nullable_nested_column(block, 1); + for (size_t row = 0; row < rows; ++row) { + ids.push_back(id_column.get_element(row)); + values.push_back(value_column.get_data_at(row).to_string()); + } + } + + EXPECT_EQ(ids, std::vector({3})); + EXPECT_EQ(values, std::vector({"lm"})); +} + +TEST_F(NewParquetReaderTest, ScanRangeFiltersRowGroupsBeforeDictionaryPruning) { + write_dictionary_filter_parquet_file(_file_path); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 6); + + std::vector> file_schema; + auto schema_descriptor = parquet_file_reader->metadata()->schema(); + ASSERT_NE(schema_descriptor, nullptr); + ASSERT_TRUE( + format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok()); + + format::FileScanRequest request; + format::FileColumnPredicateFilter column_filter; + column_filter.file_column_id = format::LocalColumnId(1); + auto value_type = std::make_shared(); + column_filter.predicates.push_back(create_comparison_predicate( + 1, "value", value_type, Field::create_field("lm"), false)); + request.column_predicate_filters.push_back(std::move(column_filter)); + + const auto [range_start_offset, range_size] = row_group_mid_range(_file_path, 2); + format::parquet::ParquetScanRange scan_range; + scan_range.start_offset = range_start_offset; + scan_range.size = range_size; + scan_range.file_size = static_cast(std::filesystem::file_size(_file_path)); + + format::parquet::RowGroupScanPlan plan; + ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(), + parquet_file_reader.get(), file_schema, + request, scan_range, false, &plan) + .ok()); + ASSERT_EQ(plan.row_groups.size(), 1); + EXPECT_EQ(plan.row_groups[0].row_group_id, 2); + EXPECT_EQ(plan.pruning_stats.total_row_groups, 6); + EXPECT_EQ(plan.pruning_stats.selected_row_groups, 1); + EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_dictionary, 0); + EXPECT_EQ(plan.pruning_stats.filtered_group_rows, 0); +} + +TEST_F(NewParquetReaderTest, NestedStructPredicateFiltersRowGroupsByStatistics) { + write_struct_filter_parquet_file(_file_path); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 2); + + std::vector> file_schema; + auto schema_descriptor = parquet_file_reader->metadata()->schema(); + ASSERT_NE(schema_descriptor, nullptr); + ASSERT_TRUE( + format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok()); + ASSERT_EQ(file_schema.size(), 1); + ASSERT_EQ(file_schema[0]->children.size(), 2); + ASSERT_EQ(file_schema[0]->children[0]->name, "id"); + + format::FileScanRequest request; + format::FileColumnPredicateFilter column_filter; + column_filter.file_column_id = format::LocalColumnId(0); + column_filter.file_child_id_path = {0}; + auto id_type = std::make_shared(); + column_filter.predicates.push_back(create_comparison_predicate( + 0, "id", id_type, Field::create_field(5), false)); + request.column_predicate_filters.push_back(std::move(column_filter)); + + format::parquet::RowGroupScanPlan plan; + format::parquet::ParquetScanRange scan_range; + ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(), + parquet_file_reader.get(), file_schema, + request, scan_range, false, &plan) + .ok()); + ASSERT_EQ(plan.row_groups.size(), 1); + EXPECT_EQ(plan.row_groups[0].row_group_id, 1); + EXPECT_EQ(plan.pruning_stats.total_row_groups, 2); + EXPECT_EQ(plan.pruning_stats.selected_row_groups, 1); + EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_statistics, 1); + EXPECT_EQ(plan.pruning_stats.filtered_group_rows, 2); +} + +TEST_F(NewParquetReaderTest, NestedStructPredicateFiltersRowGroupsByDictionary) { + write_nested_dictionary_filter_parquet_file(_file_path); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 6); + for (int row_group_idx = 0; row_group_idx < 6; ++row_group_idx) { + auto row_group = parquet_file_reader->metadata()->RowGroup(row_group_idx); + ASSERT_NE(row_group, nullptr); + auto name_chunk = row_group->ColumnChunk(1); + ASSERT_NE(name_chunk, nullptr); + ASSERT_TRUE(name_chunk->has_dictionary_page()); + ASSERT_TRUE(name_chunk->statistics() == nullptr || !name_chunk->statistics()->HasMinMax()); + } + + std::vector> file_schema; + auto schema_descriptor = parquet_file_reader->metadata()->schema(); + ASSERT_NE(schema_descriptor, nullptr); + ASSERT_TRUE( + format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok()); + ASSERT_EQ(file_schema.size(), 1); + ASSERT_EQ(file_schema[0]->children.size(), 2); + ASSERT_EQ(file_schema[0]->children[1]->name, "name"); + + format::FileScanRequest request; + format::FileColumnPredicateFilter column_filter; + column_filter.file_column_id = format::LocalColumnId(0); + column_filter.file_child_id_path = {1}; + auto name_type = std::make_shared(); + column_filter.predicates.push_back(create_comparison_predicate( + 0, "name", name_type, Field::create_field("lm"), false)); + request.column_predicate_filters.push_back(std::move(column_filter)); + + format::parquet::RowGroupScanPlan plan; + format::parquet::ParquetScanRange scan_range; + ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(), + parquet_file_reader.get(), file_schema, + request, scan_range, false, &plan) + .ok()); + ASSERT_EQ(plan.row_groups.size(), 1); + EXPECT_EQ(plan.row_groups[0].row_group_id, 2); + EXPECT_EQ(plan.pruning_stats.total_row_groups, 6); + EXPECT_EQ(plan.pruning_stats.selected_row_groups, 1); + EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_dictionary, 5); + EXPECT_EQ(plan.pruning_stats.filtered_group_rows, 5); +} + +TEST_F(NewParquetReaderTest, PlannerNarrowsRowRangesByPageIndex) { + write_page_index_filter_parquet_file(_file_path); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 1); + auto page_index_reader = parquet_file_reader->GetPageIndexReader(); + ASSERT_NE(page_index_reader, nullptr); + auto row_group_index_reader = page_index_reader->RowGroup(0); + ASSERT_NE(row_group_index_reader, nullptr); + auto offset_index = row_group_index_reader->GetOffsetIndex(0); + ASSERT_NE(offset_index, nullptr); + ASSERT_GT(offset_index->page_locations().size(), 1); + + std::vector> file_schema; + auto schema_descriptor = parquet_file_reader->metadata()->schema(); + ASSERT_NE(schema_descriptor, nullptr); + ASSERT_TRUE( + format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok()); + ASSERT_EQ(file_schema.size(), 1); + + format::FileScanRequest request; + format::FileColumnPredicateFilter column_filter; + column_filter.file_column_id = format::LocalColumnId(0); + auto id_type = std::make_shared(); + column_filter.predicates.push_back(create_comparison_predicate( + 0, "id", id_type, Field::create_field(63), false)); + request.column_predicate_filters.push_back(std::move(column_filter)); + + format::parquet::RowGroupScanPlan plan; + format::parquet::ParquetScanRange scan_range; + ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(), + parquet_file_reader.get(), file_schema, + request, scan_range, false, &plan) + .ok()); + ASSERT_EQ(plan.row_groups.size(), 1); + ASSERT_FALSE(plan.row_groups[0].selected_ranges.empty()); + EXPECT_GT(plan.row_groups[0].selected_ranges.front().start, 0); + EXPECT_LT(plan.row_groups[0].selected_ranges.front().length, 128); + auto skip_plan_it = plan.row_groups[0].page_skip_plans.find(0); + ASSERT_NE(skip_plan_it, plan.row_groups[0].page_skip_plans.end()); + EXPECT_EQ(skip_plan_it->second.leaf_column_id, 0); + EXPECT_GT(skip_plan_it->second.skipped_ranges.size(), 0); + EXPECT_GT(skip_plan_it->second.skipped_pages.size(), 1); + ASSERT_EQ(skip_plan_it->second.skipped_pages.size(), + skip_plan_it->second.skipped_page_compressed_sizes.size()); + int64_t skipped_compressed_bytes = 0; + for (size_t page_idx = 0; page_idx < skip_plan_it->second.skipped_pages.size(); ++page_idx) { + if (skip_plan_it->second.should_skip_page(page_idx)) { + skipped_compressed_bytes += skip_plan_it->second.skipped_page_compressed_size(page_idx); + } + } + EXPECT_GT(skipped_compressed_bytes, 0); + EXPECT_EQ(plan.pruning_stats.total_row_groups, 1); + EXPECT_EQ(plan.pruning_stats.selected_row_groups, 1); + EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_page_index, 0); + EXPECT_GT(plan.pruning_stats.filtered_page_rows, 0); + EXPECT_EQ(plan.pruning_stats.selected_row_ranges, plan.row_groups[0].selected_ranges.size()); +} + +TEST_F(NewParquetReaderTest, NestedStructPredicateNarrowsRowRangesByPageIndex) { + write_nested_page_index_filter_parquet_file(_file_path); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 1); + auto page_index_reader = parquet_file_reader->GetPageIndexReader(); + ASSERT_NE(page_index_reader, nullptr); + auto row_group_index_reader = page_index_reader->RowGroup(0); + ASSERT_NE(row_group_index_reader, nullptr); + auto offset_index = row_group_index_reader->GetOffsetIndex(0); + ASSERT_NE(offset_index, nullptr); + ASSERT_GT(offset_index->page_locations().size(), 1); + + std::vector> file_schema; + auto schema_descriptor = parquet_file_reader->metadata()->schema(); + ASSERT_NE(schema_descriptor, nullptr); + ASSERT_TRUE( + format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok()); + ASSERT_EQ(file_schema.size(), 1); + ASSERT_EQ(file_schema[0]->children.size(), 2); + ASSERT_EQ(file_schema[0]->children[0]->name, "id"); + + format::FileScanRequest request; + format::FileColumnPredicateFilter column_filter; + column_filter.file_column_id = format::LocalColumnId(0); + column_filter.file_child_id_path = {0}; + auto id_type = std::make_shared(); + column_filter.predicates.push_back(create_comparison_predicate( + 0, "id", id_type, Field::create_field(63), false)); + request.column_predicate_filters.push_back(std::move(column_filter)); + + format::parquet::RowGroupScanPlan plan; + format::parquet::ParquetScanRange scan_range; + ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(), + parquet_file_reader.get(), file_schema, + request, scan_range, false, &plan) + .ok()); + ASSERT_EQ(plan.row_groups.size(), 1); + ASSERT_FALSE(plan.row_groups[0].selected_ranges.empty()); + EXPECT_GT(plan.row_groups[0].selected_ranges.front().start, 0); + EXPECT_LT(plan.row_groups[0].selected_ranges.front().length, 128); + auto skip_plan_it = plan.row_groups[0].page_skip_plans.find(0); + ASSERT_NE(skip_plan_it, plan.row_groups[0].page_skip_plans.end()); + EXPECT_EQ(skip_plan_it->second.leaf_column_id, 0); + EXPECT_GT(skip_plan_it->second.skipped_ranges.size(), 0); + EXPECT_GT(skip_plan_it->second.skipped_pages.size(), 1); + ASSERT_EQ(skip_plan_it->second.skipped_pages.size(), + skip_plan_it->second.skipped_page_compressed_sizes.size()); + int64_t skipped_compressed_bytes = 0; + for (size_t page_idx = 0; page_idx < skip_plan_it->second.skipped_pages.size(); ++page_idx) { + if (skip_plan_it->second.should_skip_page(page_idx)) { + skipped_compressed_bytes += skip_plan_it->second.skipped_page_compressed_size(page_idx); + } + } + EXPECT_GT(skipped_compressed_bytes, 0); + EXPECT_EQ(plan.pruning_stats.total_row_groups, 1); + EXPECT_EQ(plan.pruning_stats.selected_row_groups, 1); + EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_page_index, 0); + EXPECT_GT(plan.pruning_stats.filtered_page_rows, 0); + EXPECT_EQ(plan.pruning_stats.selected_row_ranges, plan.row_groups[0].selected_ranges.size()); +} + +TEST_F(NewParquetReaderTest, PageIndexFilteredPagesDoNotDoubleSkipOutputColumns) { + write_page_index_filter_pair_parquet_file(_file_path); + RuntimeProfile profile("new_parquet_reader_page_skip"); + auto reader = create_reader(0, -1, &profile); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 2); + Block block = build_file_block(schema); + + auto request = std::make_shared(); + request->predicate_columns = {field_projection(0)}; + request->non_predicate_columns = {field_projection(1)}; + request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 63)); + format::FileColumnPredicateFilter column_filter; + column_filter.file_column_id = format::LocalColumnId(0); + column_filter.predicates.push_back(create_comparison_predicate( + 0, "id", schema[0].type, Field::create_field(63), false)); + request->column_predicate_filters.push_back(std::move(column_filter)); + ASSERT_TRUE(reader->open(request).ok()); + + std::vector ids; + std::vector payloads; + bool eof = false; + while (!eof) { + size_t rows = 0; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + if (rows == 0) { + continue; + } + const auto& id_column = nullable_nested_column(block, 0); + const auto& payload_column = nullable_nested_column(block, 1); + for (size_t row = 0; row < rows; ++row) { + ids.push_back(id_column.get_element(row)); + payloads.push_back(payload_column.get_element(row)); + } + } + + ASSERT_NE(profile.get_counter("PagesSkippedByDataPageFilter"), nullptr); + ASSERT_NE(profile.get_counter("DataPageFilterSkipBytes"), nullptr); + ASSERT_NE(profile.get_counter("RawRowsRead"), nullptr); + ASSERT_NE(profile.get_counter("SelectedRows"), nullptr); + ASSERT_NE(profile.get_counter("RangeGapSkippedRows"), nullptr); + ASSERT_NE(profile.get_counter("ReaderSkipRows"), nullptr); + ASSERT_NE(profile.get_counter("RowGroupFilterTime"), nullptr); + ASSERT_NE(profile.get_counter("PageIndexFilterTime"), nullptr); + ASSERT_NE(profile.get_counter("PageIndexReadTime"), nullptr); + EXPECT_GT(profile.get_counter("PagesSkippedByDataPageFilter")->value(), 0); + EXPECT_GT(profile.get_counter("DataPageFilterSkipBytes")->value(), 0); + EXPECT_EQ(profile.get_counter("RawRowsRead")->value(), 64); + EXPECT_EQ(profile.get_counter("SelectedRows")->value(), 64); + EXPECT_GT(profile.get_counter("RangeGapSkippedRows")->value(), 0); + EXPECT_EQ(profile.get_counter("ReaderSkipRows")->value(), 0); + EXPECT_GT(profile.get_counter("RowGroupFilterTime")->value(), 0); + EXPECT_GT(profile.get_counter("PageIndexFilterTime")->value(), 0); + EXPECT_GT(profile.get_counter("PageIndexReadTime")->value(), 0); + + ASSERT_EQ(ids.size(), 64); + ASSERT_EQ(payloads.size(), ids.size()); + for (size_t row = 0; row < ids.size(); ++row) { + EXPECT_EQ(ids[row], static_cast(row + 64)); + EXPECT_EQ(payloads[row], ids[row] + 1000); + } +} + +TEST_F(NewParquetReaderTest, InPredicateFiltersRowGroupsByDictionary) { + write_dictionary_filter_parquet_file(_file_path); + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + auto request = std::make_shared(); + request->predicate_columns = {field_projection(1)}; + request->non_predicate_columns = {field_projection(0)}; + request->conjuncts.push_back(create_string_in_conjunct(1, {"az", "za"})); + use_schema_order_positions(request.get(), schema); + auto set = build_set(); + set->insert(const_cast("az"), 2); + set->insert(const_cast("za"), 2); + format::FileColumnPredicateFilter column_filter; + column_filter.file_column_id = format::LocalColumnId(1); + column_filter.predicates.push_back(create_in_list_predicate( + 1, "value", schema[1].type, set, false)); + request->column_predicate_filters.push_back(std::move(column_filter)); + ASSERT_TRUE(reader->open(request).ok()); + + std::vector ids; + std::vector values; + bool eof = false; + while (!eof) { + Block block = build_file_block(schema); + size_t rows = 0; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + if (rows == 0) { + continue; + } + const auto& id_column = nullable_nested_column(block, 0); + const auto& value_column = nullable_nested_column(block, 1); + for (size_t row = 0; row < rows; ++row) { + ids.push_back(id_column.get_element(row)); + values.push_back(value_column.get_data_at(row).to_string()); + } + } + + EXPECT_EQ(ids, std::vector({2, 5})); + EXPECT_EQ(values, std::vector({"az", "za"})); +} + +TEST_F(NewParquetReaderTest, DictionaryPageV2StringEdgesSurviveSelection) { + write_dictionary_edge_parquet_file(_file_path); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 4); + for (int row_group_idx = 0; row_group_idx < 4; ++row_group_idx) { + auto row_group = parquet_file_reader->metadata()->RowGroup(row_group_idx); + ASSERT_NE(row_group, nullptr); + ASSERT_TRUE(row_group->ColumnChunk(1)->has_dictionary_page()); + } + + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + auto request = std::make_shared(); + request->predicate_columns = {field_projection(1)}; + request->non_predicate_columns = {field_projection(0)}; + request->conjuncts.push_back(create_string_in_conjunct(1, {"", "same"})); + use_schema_order_positions(request.get(), schema); + auto set = build_set(); + set->insert(const_cast(""), 0); + set->insert(const_cast("same"), 4); + format::FileColumnPredicateFilter column_filter; + column_filter.file_column_id = format::LocalColumnId(1); + column_filter.predicates.push_back(create_in_list_predicate( + 1, "value", schema[1].type, set, false)); + request->column_predicate_filters.push_back(std::move(column_filter)); + ASSERT_TRUE(reader->open(request).ok()); + + std::vector ids; + std::vector values; + bool eof = false; + while (!eof) { + Block block = build_file_block(schema); + size_t rows = 0; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + if (rows == 0) { + continue; + } + const auto& id_column = nullable_nested_column(block, 0); + const auto& value_column = nullable_nested_column(block, 1); + for (size_t row = 0; row < rows; ++row) { + ids.push_back(id_column.get_element(row)); + values.push_back(value_column.get_data_at(row).to_string()); + } + } + + EXPECT_EQ(ids, std::vector({1, 2, 5, 7})); + EXPECT_EQ(values, std::vector({"", "same", "", "same"})); +} + +TEST_F(NewParquetReaderTest, StatisticsPruningSkipsPrefixRowGroupsAndReadsLaterGroups) { + write_parquet_file(_file_path, 1); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 5); + + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + auto request = std::make_shared(); + request->predicate_columns = {field_projection(0)}; + request->non_predicate_columns = {field_projection(1)}; + request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 3)); + format::FileColumnPredicateFilter column_filter; + column_filter.file_column_id = format::LocalColumnId(0); + column_filter.predicates.push_back(create_comparison_predicate( + 0, "id", schema[0].type, Field::create_field(4), false)); + request->column_predicate_filters.push_back(std::move(column_filter)); + ASSERT_TRUE(reader->open(request).ok()); + + std::vector ids; + std::vector values; + bool eof = false; + while (!eof) { + Block block = build_file_block(schema); + size_t rows = 0; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + if (rows == 0) { + continue; + } + const auto& id_column = nullable_nested_column(block, 0); + const auto& value_column = nullable_nested_column(block, 1); + for (size_t row = 0; row < rows; ++row) { + ids.push_back(id_column.get_element(row)); + values.push_back(value_column.get_data_at(row).to_string()); + } + } + + EXPECT_EQ(ids, std::vector({4, 5})); + EXPECT_EQ(values, std::vector({"four", "five"})); +} + +TEST_F(NewParquetReaderTest, RowPositionReaderReturnsFileLocalPositions) { + write_parquet_file(_file_path, 2); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3); + + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + auto request = std::make_shared(); + request->non_predicate_columns = {field_projection(format::ROW_POSITION_COLUMN_ID), + field_projection(0)}; + request->local_positions = { + {format::LocalColumnId(0), format::LocalIndex(0)}, + {format::LocalColumnId(format::ROW_POSITION_COLUMN_ID), format::LocalIndex(2)}, + }; + ASSERT_TRUE(reader->open(request).ok()); + + std::vector row_positions; + std::vector ids; + bool eof = false; + while (!eof) { + Block block = build_file_block_with_row_position(schema); + size_t rows = 0; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + if (rows == 0) { + continue; + } + const auto& id_column = nullable_nested_column(block, 0); + const auto& row_position_column = + assert_cast(*block.get_by_position(2).column); + for (size_t row = 0; row < rows; ++row) { + ids.push_back(id_column.get_element(row)); + row_positions.push_back(row_position_column.get_element(row)); + } + } + + EXPECT_EQ(ids, std::vector({1, 2, 3, 4, 5})); + EXPECT_EQ(row_positions, std::vector({0, 1, 2, 3, 4})); +} + +TEST_F(NewParquetReaderTest, RowPositionReaderKeepsPositionsAfterSelection) { + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + Block block = build_file_block_with_row_position(schema); + + auto request = std::make_shared(); + request->predicate_columns = {field_projection(0)}; + request->non_predicate_columns = {field_projection(format::ROW_POSITION_COLUMN_ID)}; + request->local_positions = { + {format::LocalColumnId(0), format::LocalIndex(0)}, + {format::LocalColumnId(format::ROW_POSITION_COLUMN_ID), format::LocalIndex(2)}, + }; + request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2)); + ASSERT_TRUE(reader->open(request).ok()); + + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_FALSE(eof); + ASSERT_EQ(rows, 3); + + const auto& id_column = nullable_nested_column(block, 0); + const auto& row_position_column = + assert_cast(*block.get_by_position(2).column); + EXPECT_EQ(id_column.get_element(0), 3); + EXPECT_EQ(id_column.get_element(1), 4); + EXPECT_EQ(id_column.get_element(2), 5); + EXPECT_EQ(row_position_column.get_element(0), 2); + EXPECT_EQ(row_position_column.get_element(1), 3); + EXPECT_EQ(row_position_column.get_element(2), 4); +} + +TEST_F(NewParquetReaderTest, DeletePredicateFiltersRowPositions) { + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + Block block = build_file_block_with_row_position(schema); + + static const std::vector deleted_rows {1, 3}; + auto delete_predicate = std::make_shared(deleted_rows); + delete_predicate->add_child(VSlotRef::create_shared(2, 2, -1, std::make_shared(), + format::ROW_POSITION_COLUMN_NAME)); + + auto request = std::make_shared(); + request->predicate_columns = {field_projection(format::ROW_POSITION_COLUMN_ID)}; + request->non_predicate_columns = {field_projection(0)}; + request->local_positions = { + {format::LocalColumnId(0), format::LocalIndex(0)}, + {format::LocalColumnId(format::ROW_POSITION_COLUMN_ID), format::LocalIndex(2)}, + }; + request->delete_conjuncts.push_back(VExprContext::create_shared(std::move(delete_predicate))); + ASSERT_TRUE(reader->open(request).ok()); + + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_FALSE(eof); + ASSERT_EQ(rows, 3); + + const auto& id_column = nullable_nested_column(block, 0); + const auto& row_position_column = + assert_cast(*block.get_by_position(2).column); + EXPECT_EQ(id_column.get_element(0), 1); + EXPECT_EQ(id_column.get_element(1), 3); + EXPECT_EQ(id_column.get_element(2), 5); + EXPECT_EQ(row_position_column.get_element(0), 0); + EXPECT_EQ(row_position_column.get_element(1), 2); + EXPECT_EQ(row_position_column.get_element(2), 4); +} + +TEST_F(NewParquetReaderTest, QueryPredicateAndDeletePredicateFilterRowPositions) { + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + Block block = build_file_block_with_row_position(schema); + + static const std::vector deleted_rows {3}; + auto delete_predicate = std::make_shared(deleted_rows); + delete_predicate->add_child(VSlotRef::create_shared(2, 2, -1, std::make_shared(), + format::ROW_POSITION_COLUMN_NAME)); + + auto request = std::make_shared(); + request->predicate_columns = {field_projection(0), + field_projection(format::ROW_POSITION_COLUMN_ID)}; + request->non_predicate_columns = {}; + request->local_positions = { + {format::LocalColumnId(0), format::LocalIndex(0)}, + {format::LocalColumnId(format::ROW_POSITION_COLUMN_ID), format::LocalIndex(2)}, + }; + request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2)); + request->delete_conjuncts.push_back(VExprContext::create_shared(std::move(delete_predicate))); + ASSERT_TRUE(reader->open(request).ok()); + + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_FALSE(eof); + ASSERT_EQ(rows, 2); + + const auto& id_column = nullable_nested_column(block, 0); + const auto& row_position_column = + assert_cast(*block.get_by_position(2).column); + EXPECT_EQ(id_column.get_element(0), 3); + EXPECT_EQ(id_column.get_element(1), 5); + EXPECT_EQ(row_position_column.get_element(0), 2); + EXPECT_EQ(row_position_column.get_element(1), 4); +} + +TEST_F(NewParquetReaderTest, RowPositionReaderUsesFileLocalPositionsForScanRange) { + write_parquet_file(_file_path, 2); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3); + + const std::vector> expected_ids = {{1, 2}, {3, 4}, {5}}; + const std::vector> expected_row_positions = {{0, 1}, {2, 3}, {4}}; + for (int row_group_idx = 0; row_group_idx < 3; ++row_group_idx) { + const auto [range_start_offset, range_size] = + row_group_mid_range(_file_path, row_group_idx); + auto reader = create_reader(range_start_offset, range_size); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + auto request = std::make_shared(); + request->non_predicate_columns = {field_projection(format::ROW_POSITION_COLUMN_ID), + field_projection(0)}; + request->local_positions = { + {format::LocalColumnId(0), format::LocalIndex(0)}, + {format::LocalColumnId(format::ROW_POSITION_COLUMN_ID), format::LocalIndex(2)}, + }; + ASSERT_TRUE(reader->open(request).ok()); + + std::vector ids; + std::vector row_positions; + bool eof = false; + while (!eof) { + Block block = build_file_block_with_row_position(schema); + size_t rows = 0; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + if (rows == 0) { + continue; + } + const auto& id_column = nullable_nested_column(block, 0); + const auto& row_position_column = + assert_cast(*block.get_by_position(2).column); + for (size_t row = 0; row < rows; ++row) { + ids.push_back(id_column.get_element(row)); + row_positions.push_back(row_position_column.get_element(row)); + } + } + + EXPECT_EQ(ids, expected_ids[row_group_idx]); + EXPECT_EQ(row_positions, expected_row_positions[row_group_idx]); + } +} + +} // namespace +} // namespace doris diff --git a/be/test/format_v2/parquet/parquet_scan_test.cpp b/be/test/format_v2/parquet/parquet_scan_test.cpp new file mode 100644 index 00000000000000..3b381c3158fd45 --- /dev/null +++ b/be/test/format_v2/parquet/parquet_scan_test.cpp @@ -0,0 +1,804 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/parquet_scan.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/config.h" +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/field.h" +#include "format_v2/file_reader.h" +#include "format_v2/parquet/parquet_column_schema.h" +#include "format_v2/parquet/parquet_reader.h" +#include "gen_cpp/PlanNodes_types.h" +#include "gen_cpp/Types_types.h" +#include "io/io_common.h" +#include "runtime/runtime_state.h" +#include "storage/predicate/predicate_creator.h" +#include "storage/utils.h" + +namespace doris { +namespace { + +format::LocalColumnIndex field_projection(int32_t column_id) { + return format::LocalColumnIndex {.index = column_id}; +} + +const ColumnInt32& int32_data_column(const IColumn& column) { + if (const auto* nullable_column = check_and_get_column(&column)) { + return assert_cast(nullable_column->get_nested_column()); + } + return assert_cast(column); +} + +const ColumnString& string_data_column(const IColumn& column) { + if (const auto* nullable_column = check_and_get_column(&column)) { + return assert_cast(nullable_column->get_nested_column()); + } + return assert_cast(column); +} + +std::shared_ptr finish_array(arrow::ArrayBuilder* builder) { + std::shared_ptr array; + EXPECT_TRUE(builder->Finish(&array).ok()); + return array; +} + +std::shared_ptr build_int32_array(const std::vector& values) { + arrow::Int32Builder builder; + for (const auto value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return finish_array(&builder); +} + +std::shared_ptr build_struct_array(const std::vector& ids, + const std::vector& names) { + auto struct_type = arrow::struct_({arrow::field("id", arrow::int32(), false), + arrow::field("name", arrow::utf8(), false)}); + std::vector> field_builders; + field_builders.push_back(std::shared_ptr( + std::make_unique().release())); + field_builders.push_back(std::shared_ptr( + std::make_unique().release())); + arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(), + std::move(field_builders)); + auto* id_builder = assert_cast(builder.field_builder(0)); + auto* name_builder = assert_cast(builder.field_builder(1)); + for (size_t row = 0; row < ids.size(); ++row) { + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(id_builder->Append(ids[row]).ok()); + EXPECT_TRUE(name_builder->Append(names[row]).ok()); + } + return finish_array(&builder); +} + +std::shared_ptr build_list_array() { + auto value_builder = std::make_unique(); + arrow::ListBuilder builder(arrow::default_memory_pool(), std::move(value_builder)); + auto* int_builder = assert_cast(builder.value_builder()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(int_builder->Append(1).ok()); + EXPECT_TRUE(int_builder->Append(2).ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(int_builder->Append(3).ok()); + EXPECT_TRUE(builder.Append().ok()); + return finish_array(&builder); +} + +void write_table(const std::string& file_path, const std::shared_ptr& table, + int64_t row_group_size, bool enable_dictionary = false, + bool enable_page_index = false, bool enable_statistics = true) { + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + if (enable_dictionary) { + builder.enable_dictionary(); + } else { + builder.disable_dictionary(); + } + if (enable_page_index) { + builder.enable_write_page_index(); + builder.write_batch_size(8); + builder.data_pagesize(10); + } + if (!enable_statistics) { + builder.disable_statistics(); + } + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + row_group_size, builder.build())); +} + +void write_int_pair_parquet_file(const std::string& file_path, int64_t row_group_size = 2, + bool enable_statistics = true) { + auto schema = arrow::schema({ + arrow::field("id", arrow::int32(), false), + arrow::field("score", arrow::int32(), false), + }); + auto table = arrow::Table::Make(schema, {build_int32_array({1, 2, 3, 4, 5, 6}), + build_int32_array({10, 20, 30, 40, 50, 60})}); + write_table(file_path, table, row_group_size, false, false, enable_statistics); +} + +void write_struct_parquet_file(const std::string& file_path) { + auto struct_type = arrow::struct_({arrow::field("id", arrow::int32(), false), + arrow::field("name", arrow::utf8(), false)}); + auto schema = arrow::schema({ + arrow::field("s", struct_type, false), + }); + auto table = arrow::Table::Make( + schema, {build_struct_array({1, 2, 10, 11}, {"one", "two", "ten", "eleven"})}); + write_table(file_path, table, 2); +} + +void write_list_parquet_file(const std::string& file_path) { + auto schema = arrow::schema({ + arrow::field("xs", arrow::list(arrow::int32()), false), + }); + auto table = arrow::Table::Make(schema, {build_list_array()}); + write_table(file_path, table, 2); +} + +void write_page_index_parquet_file(const std::string& file_path) { + std::vector ids(128); + std::iota(ids.begin(), ids.end(), 0); + auto schema = arrow::schema({ + arrow::field("id", arrow::int32(), false), + }); + auto table = arrow::Table::Make(schema, {build_int32_array(ids)}); + write_table(file_path, table, ids.size(), false, true); +} + +void write_page_index_pair_parquet_file(const std::string& file_path) { + std::vector ids(128); + std::iota(ids.begin(), ids.end(), 0); + auto schema = arrow::schema({ + arrow::field("id", arrow::int32(), false), + arrow::field("score", arrow::int32(), false), + }); + auto table = arrow::Table::Make(schema, {build_int32_array(ids), build_int32_array(ids)}); + write_table(file_path, table, ids.size(), false, true); +} + +int64_t parquet_column_start_offset(const ::parquet::ColumnChunkMetaData& column_metadata) { + return column_metadata.has_dictionary_page() + ? static_cast(column_metadata.dictionary_page_offset()) + : static_cast(column_metadata.data_page_offset()); +} + +std::pair row_group_mid_range(const std::string& file_path, int row_group_idx) { + auto reader = ::parquet::ParquetFileReader::OpenFile(file_path, false); + auto metadata = reader->metadata(); + auto row_group_metadata = metadata->RowGroup(row_group_idx); + auto first_column = row_group_metadata->ColumnChunk(0); + auto last_column = row_group_metadata->ColumnChunk(row_group_metadata->num_columns() - 1); + const int64_t row_group_start_offset = parquet_column_start_offset(*first_column); + const int64_t row_group_end_offset = + parquet_column_start_offset(*last_column) + last_column->total_compressed_size(); + const int64_t row_group_mid_offset = + row_group_start_offset + (row_group_end_offset - row_group_start_offset) / 2; + return {row_group_mid_offset, 1}; +} + +Block build_file_block(const std::vector& schema) { + Block block; + for (const auto& field : schema) { + block.insert({field.type->create_column(), field.type, field.name}); + } + return block; +} + +GlobalRowLoacationV2 decode_rowid(const ColumnString& column, size_t row) { + const auto ref = column.get_data_at(row); + EXPECT_EQ(ref.size, sizeof(GlobalRowLoacationV2)); + GlobalRowLoacationV2 location(0, 0, 0, 0); + std::memcpy(&location, ref.data, sizeof(GlobalRowLoacationV2)); + return location; +} + +void use_schema_order_positions(format::FileScanRequest* request, + const std::vector& schema) { + DORIS_CHECK(request != nullptr); + for (size_t idx = 0; idx < schema.size(); ++idx) { + request->local_positions.emplace(format::LocalColumnId(schema[idx].local_id), + format::LocalIndex(idx)); + } +} + +std::vector> build_file_schema( + const ::parquet::ParquetFileReader& reader) { + std::vector> file_schema; + auto schema_descriptor = reader.metadata()->schema(); + EXPECT_NE(schema_descriptor, nullptr); + EXPECT_TRUE( + format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok()); + return file_schema; +} + +format::FileColumnPredicateFilter int32_filter(int32_t column_id, std::string column_name, + const DataTypePtr& type, + PredicateType predicate_type, int32_t value) { + format::FileColumnPredicateFilter column_filter; + column_filter.file_column_id = format::LocalColumnId(column_id); + switch (predicate_type) { + case PredicateType::GE: + column_filter.predicates.push_back(create_comparison_predicate( + column_id, column_name, type, Field::create_field(value), false)); + break; + case PredicateType::GT: + column_filter.predicates.push_back(create_comparison_predicate( + column_id, column_name, type, Field::create_field(value), false)); + break; + case PredicateType::LT: + column_filter.predicates.push_back(create_comparison_predicate( + column_id, column_name, type, Field::create_field(value), false)); + break; + default: + DORIS_CHECK(false); + } + return column_filter; +} + +int64_t count_range_rows(const std::vector& ranges) { + int64_t rows = 0; + for (const auto& range : ranges) { + rows += range.length; + } + return rows; +} + +class ParquetScanTest : public testing::Test { +protected: + void SetUp() override { + _test_dir = std::filesystem::temp_directory_path() / "doris_format_v2_parquet_scan_test"; + std::filesystem::remove_all(_test_dir); + std::filesystem::create_directories(_test_dir); + _file_path = (_test_dir / "scan.parquet").string(); + } + + void TearDown() override { std::filesystem::remove_all(_test_dir); } + + std::unique_ptr create_reader( + int64_t range_start_offset = 0, int64_t range_size = -1, + RuntimeProfile* profile = nullptr, + std::optional global_rowid_context = std::nullopt) const { + auto system_properties = std::make_shared(); + system_properties->system_type = TFileType::FILE_LOCAL; + auto file_description = std::make_unique(); + file_description->path = _file_path; + file_description->file_size = static_cast(std::filesystem::file_size(_file_path)); + file_description->range_start_offset = range_start_offset; + file_description->range_size = range_size; + return std::make_unique( + system_properties, file_description, nullptr, profile, global_rowid_context); + } + + std::shared_ptr open_all_row_groups( + format::parquet::ParquetReader* reader) { + auto request = std::make_shared(); + EXPECT_TRUE(reader->open(request).ok()); + return request; + } + + std::filesystem::path _test_dir; + std::string _file_path; +}; + +TEST_F(ParquetScanTest, PlanRowGroupsAppliesScanRangeBeforeStatistics) { + write_int_pair_parquet_file(_file_path, 2); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3); + auto file_schema = build_file_schema(*parquet_file_reader); + + format::FileScanRequest request; + request.column_predicate_filters.push_back( + int32_filter(0, "id", file_schema[0]->type, PredicateType::GE, 5)); + + const auto [range_start_offset, range_size] = row_group_mid_range(_file_path, 1); + format::parquet::ParquetScanRange scan_range; + scan_range.start_offset = range_start_offset; + scan_range.size = range_size; + scan_range.file_size = static_cast(std::filesystem::file_size(_file_path)); + + format::parquet::RowGroupScanPlan plan; + ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(), + parquet_file_reader.get(), file_schema, + request, scan_range, false, &plan) + .ok()); + EXPECT_TRUE(plan.row_groups.empty()); + EXPECT_EQ(plan.pruning_stats.total_row_groups, 3); + EXPECT_EQ(plan.pruning_stats.selected_row_groups, 0); + EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_statistics, 1); + EXPECT_EQ(plan.pruning_stats.filtered_group_rows, 2); +} + +TEST_F(ParquetScanTest, PlanRowGroupsPreservesFirstFileRowAcrossPrunedRowGroups) { + write_int_pair_parquet_file(_file_path, 2); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3); + auto file_schema = build_file_schema(*parquet_file_reader); + + format::FileScanRequest request; + request.column_predicate_filters.push_back( + int32_filter(0, "id", file_schema[0]->type, PredicateType::GE, 5)); + + format::parquet::RowGroupScanPlan plan; + format::parquet::ParquetScanRange scan_range; + ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(), + parquet_file_reader.get(), file_schema, + request, scan_range, false, &plan) + .ok()); + ASSERT_EQ(plan.row_groups.size(), 1); + EXPECT_EQ(plan.row_groups[0].row_group_id, 2); + EXPECT_EQ(plan.row_groups[0].first_file_row, 4); + EXPECT_EQ(plan.row_groups[0].row_group_rows, 2); + ASSERT_EQ(plan.row_groups[0].selected_ranges.size(), 1); + EXPECT_EQ(plan.row_groups[0].selected_ranges[0].start, 0); + EXPECT_EQ(plan.row_groups[0].selected_ranges[0].length, 2); + EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_statistics, 2); + EXPECT_EQ(plan.pruning_stats.filtered_group_rows, 4); +} + +TEST_F(ParquetScanTest, PlanRowGroupsSelectsAllRowGroupsWithoutFilters) { + write_int_pair_parquet_file(_file_path, 2); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3); + auto file_schema = build_file_schema(*parquet_file_reader); + + format::FileScanRequest request; + format::parquet::RowGroupScanPlan plan; + format::parquet::ParquetScanRange scan_range; + ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(), + parquet_file_reader.get(), file_schema, + request, scan_range, false, &plan) + .ok()); + + ASSERT_EQ(plan.row_groups.size(), 3); + EXPECT_EQ(plan.pruning_stats.total_row_groups, 3); + EXPECT_EQ(plan.pruning_stats.selected_row_groups, 3); + for (size_t row_group_idx = 0; row_group_idx < plan.row_groups.size(); ++row_group_idx) { + EXPECT_EQ(plan.row_groups[row_group_idx].row_group_id, row_group_idx); + EXPECT_EQ(plan.row_groups[row_group_idx].first_file_row, + static_cast(row_group_idx * 2)); + ASSERT_EQ(plan.row_groups[row_group_idx].selected_ranges.size(), 1); + EXPECT_EQ(plan.row_groups[row_group_idx].selected_ranges[0].start, 0); + EXPECT_EQ(plan.row_groups[row_group_idx].selected_ranges[0].length, 2); + EXPECT_TRUE(plan.row_groups[row_group_idx].page_skip_plans.empty()); + } +} + +TEST_F(ParquetScanTest, PageIndexIntersectsMultipleFiltersAndBuildsSkipPlan) { + write_page_index_pair_parquet_file(_file_path); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 1); + auto file_schema = build_file_schema(*parquet_file_reader); + + format::FileScanRequest single_filter_request; + single_filter_request.column_predicate_filters.push_back( + int32_filter(0, "id", file_schema[0]->type, PredicateType::GE, 32)); + format::parquet::RowGroupScanPlan single_filter_plan; + format::parquet::ParquetScanRange scan_range; + ASSERT_TRUE(format::parquet::plan_parquet_row_groups( + *parquet_file_reader->metadata(), parquet_file_reader.get(), file_schema, + single_filter_request, scan_range, false, &single_filter_plan) + .ok()); + ASSERT_EQ(single_filter_plan.row_groups.size(), 1); + const int64_t single_filter_rows = + count_range_rows(single_filter_plan.row_groups[0].selected_ranges); + + format::FileScanRequest intersect_request; + intersect_request.column_predicate_filters.push_back( + int32_filter(0, "id", file_schema[0]->type, PredicateType::GE, 32)); + intersect_request.column_predicate_filters.push_back( + int32_filter(1, "score", file_schema[1]->type, PredicateType::LT, 96)); + format::parquet::RowGroupScanPlan intersect_plan; + ASSERT_TRUE(format::parquet::plan_parquet_row_groups( + *parquet_file_reader->metadata(), parquet_file_reader.get(), file_schema, + intersect_request, scan_range, false, &intersect_plan) + .ok()); + ASSERT_EQ(intersect_plan.row_groups.size(), 1); + ASSERT_FALSE(intersect_plan.row_groups[0].selected_ranges.empty()); + const int64_t intersect_rows = count_range_rows(intersect_plan.row_groups[0].selected_ranges); + EXPECT_GT(single_filter_rows, intersect_rows); + EXPECT_GT(intersect_plan.row_groups[0].selected_ranges.front().start, 0); + const auto& last_range = intersect_plan.row_groups[0].selected_ranges.back(); + EXPECT_LT(last_range.start + last_range.length, 128); + EXPECT_GT(intersect_plan.pruning_stats.filtered_page_rows, 0); + EXPECT_EQ(intersect_plan.pruning_stats.selected_row_ranges, + intersect_plan.row_groups[0].selected_ranges.size()); + + auto id_skip_plan = intersect_plan.row_groups[0].page_skip_plans.find(0); + ASSERT_NE(id_skip_plan, intersect_plan.row_groups[0].page_skip_plans.end()); + EXPECT_EQ(id_skip_plan->second.leaf_column_id, 0); + EXPECT_FALSE(id_skip_plan->second.empty()); + auto score_skip_plan = intersect_plan.row_groups[0].page_skip_plans.find(1); + ASSERT_NE(score_skip_plan, intersect_plan.row_groups[0].page_skip_plans.end()); + EXPECT_EQ(score_skip_plan->second.leaf_column_id, 1); + EXPECT_FALSE(score_skip_plan->second.empty()); +} + +TEST_F(ParquetScanTest, PageIndexCanFullyFilterRowGroupAfterRangeIntersection) { + write_page_index_parquet_file(_file_path); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 1); + auto file_schema = build_file_schema(*parquet_file_reader); + + format::FileScanRequest request; + request.column_predicate_filters.push_back( + int32_filter(0, "id", file_schema[0]->type, PredicateType::GE, 32)); + request.column_predicate_filters.push_back( + int32_filter(0, "id", file_schema[0]->type, PredicateType::LT, 32)); + + format::parquet::RowGroupScanPlan plan; + format::parquet::ParquetScanRange scan_range; + ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(), + parquet_file_reader.get(), file_schema, + request, scan_range, false, &plan) + .ok()); + EXPECT_TRUE(plan.row_groups.empty()); + EXPECT_EQ(plan.pruning_stats.total_row_groups, 1); + EXPECT_EQ(plan.pruning_stats.selected_row_groups, 0); + EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_statistics, 0); + EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_page_index, 1); + EXPECT_EQ(plan.pruning_stats.filtered_page_rows, 128); +} + +TEST_F(ParquetScanTest, PageIndexFullRangeWhenDisabledOrUnavailable) { + write_page_index_parquet_file(_file_path); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + auto file_schema = build_file_schema(*parquet_file_reader); + + format::FileScanRequest request; + request.column_predicate_filters.push_back( + int32_filter(0, "id", file_schema[0]->type, PredicateType::GT, 63)); + + const bool old_enable_page_index = config::enable_parquet_page_index; + config::enable_parquet_page_index = false; + std::vector selected_ranges; + std::map page_skip_plans; + format::parquet::ParquetPruningStats pruning_stats; + ASSERT_TRUE(format::parquet::select_row_group_ranges_by_page_index( + parquet_file_reader.get(), file_schema, request, 0, 128, &selected_ranges, + &page_skip_plans, &pruning_stats) + .ok()); + config::enable_parquet_page_index = old_enable_page_index; + ASSERT_EQ(selected_ranges.size(), 1); + EXPECT_EQ(selected_ranges[0].start, 0); + EXPECT_EQ(selected_ranges[0].length, 128); + EXPECT_TRUE(page_skip_plans.empty()); + EXPECT_EQ(pruning_stats.page_index_read_calls, 0); + + write_int_pair_parquet_file(_file_path, 6); + auto no_index_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + auto no_index_schema = build_file_schema(*no_index_reader); + format::FileScanRequest no_index_request; + no_index_request.column_predicate_filters.push_back( + int32_filter(0, "id", no_index_schema[0]->type, PredicateType::GT, 3)); + selected_ranges.clear(); + page_skip_plans.clear(); + pruning_stats = {}; + ASSERT_TRUE(format::parquet::select_row_group_ranges_by_page_index( + no_index_reader.get(), no_index_schema, no_index_request, 0, 6, + &selected_ranges, &page_skip_plans, &pruning_stats) + .ok()); + ASSERT_EQ(selected_ranges.size(), 1); + EXPECT_EQ(selected_ranges[0].start, 0); + EXPECT_EQ(selected_ranges[0].length, 6); + EXPECT_TRUE(page_skip_plans.empty()); +} + +TEST_F(ParquetScanTest, AggregateCountAndMinMaxUseAllSelectedRowGroups) { + write_int_pair_parquet_file(_file_path); + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + open_all_row_groups(reader.get()); + + format::FileAggregateResult count_result; + format::FileAggregateRequest count_request; + count_request.agg_type = TPushAggOp::COUNT; + ASSERT_TRUE(reader->get_aggregate_result(count_request, &count_result).ok()); + EXPECT_EQ(count_result.count, 6); + EXPECT_TRUE(count_result.columns.empty()); + + format::FileAggregateResult minmax_result; + format::FileAggregateRequest minmax_request; + minmax_request.agg_type = TPushAggOp::MINMAX; + minmax_request.columns.push_back({.projection = field_projection(0)}); + minmax_request.columns.push_back({.projection = field_projection(1)}); + ASSERT_TRUE(reader->get_aggregate_result(minmax_request, &minmax_result).ok()); + EXPECT_EQ(minmax_result.count, 6); + ASSERT_EQ(minmax_result.columns.size(), 2); + EXPECT_TRUE(minmax_result.columns[0].has_min); + EXPECT_TRUE(minmax_result.columns[0].has_max); + EXPECT_EQ(minmax_result.columns[0].min_value.get(), 1); + EXPECT_EQ(minmax_result.columns[0].max_value.get(), 6); + EXPECT_EQ(minmax_result.columns[1].min_value.get(), 10); + EXPECT_EQ(minmax_result.columns[1].max_value.get(), 60); +} + +TEST_F(ParquetScanTest, AggregateRespectsStatisticsPrunedRowGroups) { + write_int_pair_parquet_file(_file_path); + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + auto request = std::make_shared(); + format::FileColumnPredicateFilter column_filter; + column_filter.file_column_id = format::LocalColumnId(0); + column_filter.predicates.push_back(create_comparison_predicate( + 0, "id", schema[0].type, Field::create_field(5), false)); + request->column_predicate_filters.push_back(std::move(column_filter)); + ASSERT_TRUE(reader->open(request).ok()); + + format::FileAggregateRequest aggregate_request; + aggregate_request.agg_type = TPushAggOp::MINMAX; + aggregate_request.columns.push_back({.projection = field_projection(0)}); + format::FileAggregateResult result; + ASSERT_TRUE(reader->get_aggregate_result(aggregate_request, &result).ok()); + EXPECT_EQ(result.count, 2); + ASSERT_EQ(result.columns.size(), 1); + EXPECT_EQ(result.columns[0].min_value.get(), 5); + EXPECT_EQ(result.columns[0].max_value.get(), 6); +} + +TEST_F(ParquetScanTest, AggregateCountKeepsRowGroupRowsAfterPageIndexPruning) { + write_page_index_parquet_file(_file_path); + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + auto request = std::make_shared(); + format::FileColumnPredicateFilter column_filter; + column_filter.file_column_id = format::LocalColumnId(0); + column_filter.predicates.push_back(create_comparison_predicate( + 0, "id", schema[0].type, Field::create_field(63), false)); + request->column_predicate_filters.push_back(std::move(column_filter)); + ASSERT_TRUE(reader->open(request).ok()); + + format::FileAggregateRequest aggregate_request; + aggregate_request.agg_type = TPushAggOp::COUNT; + format::FileAggregateResult result; + ASSERT_TRUE(reader->get_aggregate_result(aggregate_request, &result).ok()); + EXPECT_EQ(result.count, 128); +} + +TEST_F(ParquetScanTest, AggregateMinMaxSupportsNestedSingleLeafProjection) { + write_struct_parquet_file(_file_path); + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + open_all_row_groups(reader.get()); + + format::LocalColumnIndex nested_id = format::LocalColumnIndex::partial_local(0); + nested_id.children.push_back(field_projection(0)); + format::FileAggregateRequest aggregate_request; + aggregate_request.agg_type = TPushAggOp::MINMAX; + aggregate_request.columns.push_back({.projection = nested_id}); + format::FileAggregateResult result; + ASSERT_TRUE(reader->get_aggregate_result(aggregate_request, &result).ok()); + EXPECT_EQ(result.count, 4); + ASSERT_EQ(result.columns.size(), 1); + EXPECT_EQ(result.columns[0].min_value.get(), 1); + EXPECT_EQ(result.columns[0].max_value.get(), 11); +} + +TEST_F(ParquetScanTest, AggregateRejectsRepeatedMissingStatisticsAndInvalidRequests) { + write_list_parquet_file(_file_path); + auto repeated_reader = create_reader(); + RuntimeState repeated_state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(repeated_reader->init(&repeated_state).ok()); + open_all_row_groups(repeated_reader.get()); + + format::FileAggregateRequest repeated_request; + repeated_request.agg_type = TPushAggOp::MINMAX; + repeated_request.columns.push_back({.projection = field_projection(0)}); + format::FileAggregateResult repeated_result; + EXPECT_FALSE(repeated_reader->get_aggregate_result(repeated_request, &repeated_result).ok()); + + write_int_pair_parquet_file(_file_path, 2, false); + auto no_stats_reader = create_reader(); + RuntimeState no_stats_state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(no_stats_reader->init(&no_stats_state).ok()); + open_all_row_groups(no_stats_reader.get()); + format::FileAggregateRequest no_stats_request; + no_stats_request.agg_type = TPushAggOp::MINMAX; + no_stats_request.columns.push_back({.projection = field_projection(0)}); + format::FileAggregateResult no_stats_result; + EXPECT_FALSE(no_stats_reader->get_aggregate_result(no_stats_request, &no_stats_result).ok()); + + format::FileAggregateRequest invalid_type_request; + invalid_type_request.agg_type = TPushAggOp::MIX; + format::FileAggregateResult invalid_type_result; + EXPECT_FALSE( + no_stats_reader->get_aggregate_result(invalid_type_request, &invalid_type_result).ok()); + + format::FileAggregateRequest invalid_column_request; + invalid_column_request.agg_type = TPushAggOp::MINMAX; + invalid_column_request.columns.push_back({.projection = field_projection(100)}); + format::FileAggregateResult invalid_column_result; + EXPECT_FALSE( + no_stats_reader->get_aggregate_result(invalid_column_request, &invalid_column_result) + .ok()); +} + +TEST_F(ParquetScanTest, GlobalRowIdUsesFileLocalPositionForScanRange) { + write_int_pair_parquet_file(_file_path, 2); + auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false); + ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3); + const auto [range_start_offset, range_size] = row_group_mid_range(_file_path, 1); + format::GlobalRowIdContext context {.version = 7, .backend_id = 123456789, .file_id = 42}; + auto reader = create_reader(range_start_offset, range_size, nullptr, context); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 3); + auto request = std::make_shared(); + request->non_predicate_columns = {field_projection(0), + field_projection(format::GLOBAL_ROWID_COLUMN_ID)}; + use_schema_order_positions(request.get(), schema); + ASSERT_TRUE(reader->open(request).ok()); + + std::vector ids; + std::vector row_ids; + bool eof = false; + while (!eof) { + Block block = build_file_block(schema); + size_t rows = 0; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + if (rows == 0) { + continue; + } + const auto& id_column = int32_data_column(*block.get_by_position(0).column); + const auto& rowid_column = string_data_column(*block.get_by_position(2).column); + for (size_t row = 0; row < rows; ++row) { + ids.push_back(id_column.get_element(row)); + const auto location = decode_rowid(rowid_column, row); + EXPECT_EQ(location.version, context.version); + EXPECT_EQ(location.backend_id, context.backend_id); + EXPECT_EQ(location.file_id, context.file_id); + row_ids.push_back(location.row_id); + } + } + + EXPECT_EQ(ids, std::vector({3, 4})); + EXPECT_EQ(row_ids, std::vector({2, 3})); +} + +TEST_F(ParquetScanTest, EmptyScanPlanReturnsEofWithoutReadingColumns) { + write_int_pair_parquet_file(_file_path, 2); + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + auto request = std::make_shared(); + format::FileColumnPredicateFilter column_filter; + column_filter.file_column_id = format::LocalColumnId(0); + column_filter.predicates.push_back(create_comparison_predicate( + 0, "id", schema[0].type, Field::create_field(100), false)); + request->column_predicate_filters.push_back(std::move(column_filter)); + ASSERT_TRUE(reader->open(request).ok()); + + Block block = build_file_block(schema); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_EQ(rows, 0); + EXPECT_TRUE(eof); +} + +TEST_F(ParquetScanTest, NoRequestedColumnsReturnsRowsOnlyAcrossRowGroups) { + write_int_pair_parquet_file(_file_path, 2); + auto reader = create_reader(); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + auto request = std::make_shared(); + ASSERT_TRUE(reader->open(request).ok()); + + size_t total_rows = 0; + bool eof = false; + while (!eof) { + Block block; + size_t rows = 0; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_EQ(block.columns(), 0); + total_rows += rows; + } + EXPECT_EQ(total_rows, 6); +} + +TEST_F(ParquetScanTest, ProfileCountersReflectPageIndexAndRangeGapPruning) { + write_page_index_parquet_file(_file_path); + RuntimeProfile profile("profile"); + auto reader = create_reader(0, -1, &profile); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + auto request = std::make_shared(); + request->non_predicate_columns = {field_projection(0)}; + use_schema_order_positions(request.get(), schema); + format::FileColumnPredicateFilter column_filter; + column_filter.file_column_id = format::LocalColumnId(0); + column_filter.predicates.push_back(create_comparison_predicate( + 0, "id", schema[0].type, Field::create_field(63), false)); + request->column_predicate_filters.push_back(std::move(column_filter)); + ASSERT_TRUE(reader->open(request).ok()); + + size_t total_rows = 0; + bool eof = false; + while (!eof) { + Block block = build_file_block(schema); + size_t rows = 0; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + total_rows += rows; + } + + EXPECT_EQ(total_rows, 64); + ASSERT_NE(profile.get_counter("RowGroupsTotalNum"), nullptr); + ASSERT_NE(profile.get_counter("RowGroupsReadNum"), nullptr); + ASSERT_NE(profile.get_counter("FilteredRowsByPage"), nullptr); + ASSERT_NE(profile.get_counter("SelectedRowRanges"), nullptr); + ASSERT_NE(profile.get_counter("PageIndexReadCalls"), nullptr); + ASSERT_NE(profile.get_counter("RawRowsRead"), nullptr); + ASSERT_NE(profile.get_counter("RangeGapSkippedRows"), nullptr); + EXPECT_EQ(profile.get_counter("RowGroupsTotalNum")->value(), 1); + EXPECT_EQ(profile.get_counter("RowGroupsReadNum")->value(), 1); + EXPECT_GT(profile.get_counter("FilteredRowsByPage")->value(), 0); + EXPECT_GT(profile.get_counter("SelectedRowRanges")->value(), 0); + EXPECT_GT(profile.get_counter("PageIndexReadCalls")->value(), 0); + EXPECT_EQ(profile.get_counter("RawRowsRead")->value(), 64); + EXPECT_GT(profile.get_counter("RangeGapSkippedRows")->value(), 0); +} + +} // namespace +} // namespace doris diff --git a/be/test/format_v2/parquet/parquet_schema_test.cpp b/be/test/format_v2/parquet/parquet_schema_test.cpp new file mode 100644 index 00000000000000..e620ed718efbf2 --- /dev/null +++ b/be/test/format_v2/parquet/parquet_schema_test.cpp @@ -0,0 +1,527 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include +#include + +#include "core/assert_cast.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_struct.h" +#include "core/data_type/primitive_type.h" +#include "format_v2/parquet/parquet_column_schema.h" + +namespace doris::format::parquet { +namespace { + +std::vector> build_fields( + const std::vector<::parquet::schema::NodePtr>& nodes) { + auto schema = + ::parquet::schema::GroupNode::Make("schema", ::parquet::Repetition::REQUIRED, nodes); + ::parquet::SchemaDescriptor descriptor; + descriptor.Init(schema); + std::vector> fields; + EXPECT_TRUE(build_parquet_column_schema(descriptor, &fields).ok()); + return fields; +} + +Status build_status(const std::vector<::parquet::schema::NodePtr>& nodes) { + auto schema = + ::parquet::schema::GroupNode::Make("schema", ::parquet::Repetition::REQUIRED, nodes); + ::parquet::SchemaDescriptor descriptor; + descriptor.Init(schema); + std::vector> fields; + return build_parquet_column_schema(descriptor, &fields); +} + +} // namespace + +TEST(ParquetSchemaTest, PrimitiveStateAndFieldIdArePreserved) { + const auto fields = build_fields({ + ::parquet::schema::PrimitiveNode::Make("required_i32", ::parquet::Repetition::REQUIRED, + ::parquet::Type::INT32), + ::parquet::schema::PrimitiveNode::Make("optional_i64", ::parquet::Repetition::OPTIONAL, + ::parquet::Type::INT64, + ::parquet::ConvertedType::NONE, -1, -1, -1, 42), + }); + + ASSERT_EQ(fields.size(), 2); + EXPECT_EQ(fields[0]->local_id, 0); + EXPECT_EQ(fields[0]->name, "required_i32"); + EXPECT_EQ(fields[0]->kind, ParquetColumnSchemaKind::PRIMITIVE); + EXPECT_EQ(fields[0]->leaf_column_id, 0); + EXPECT_EQ(fields[0]->nullable_definition_level, 0); + EXPECT_FALSE(fields[0]->type->is_nullable()); + + EXPECT_EQ(fields[1]->local_id, 1); + EXPECT_EQ(fields[1]->parquet_field_id, 42); + EXPECT_EQ(fields[1]->leaf_column_id, 1); + EXPECT_EQ(fields[1]->nullable_definition_level, 1); + EXPECT_TRUE(fields[1]->type->is_nullable()); +} + +TEST(ParquetSchemaTest, PrimitiveTypeDescriptorCoversLogicalConvertedAndPhysicalFallback) { + const auto fields = build_fields({ + ::parquet::schema::PrimitiveNode::Make( + "ts", ::parquet::Repetition::OPTIONAL, + ::parquet::LogicalType::Timestamp(false, + ::parquet::LogicalType::TimeUnit::MICROS), + ::parquet::Type::INT64), + ::parquet::schema::PrimitiveNode::Make("i8", ::parquet::Repetition::REQUIRED, + ::parquet::Type::INT32, + ::parquet::ConvertedType::INT_8), + ::parquet::schema::PrimitiveNode::Make("plain", ::parquet::Repetition::REQUIRED, + ::parquet::Type::DOUBLE), + }); + + ASSERT_EQ(fields.size(), 3); + EXPECT_EQ(remove_nullable(fields[0]->type)->get_primitive_type(), TYPE_DATETIMEV2); + EXPECT_EQ(fields[0]->type_descriptor.time_unit, ParquetTimeUnit::MICROS); + EXPECT_EQ(fields[0]->type_descriptor.extra_type_info, ParquetExtraTypeInfo::UNIT_MICROS); + EXPECT_TRUE(fields[0]->type_descriptor.is_timestamp); + EXPECT_FALSE(fields[0]->type_descriptor.timestamp_is_adjusted_to_utc); + + EXPECT_EQ(remove_nullable(fields[1]->type)->get_primitive_type(), TYPE_TINYINT); + EXPECT_EQ(fields[1]->type_descriptor.integer_bit_width, 8); + EXPECT_FALSE(fields[1]->type_descriptor.is_unsigned_integer); + + EXPECT_EQ(remove_nullable(fields[2]->type)->get_primitive_type(), TYPE_DOUBLE); + EXPECT_EQ(fields[2]->type_descriptor.physical_type, ::parquet::Type::DOUBLE); + EXPECT_EQ(fields[2]->type_descriptor.extra_type_info, ParquetExtraTypeInfo::NONE); +} + +TEST(ParquetSchemaTest, StructMakesDataTypeChildrenNullableAndPropagatesLevels) { + const auto fields = build_fields({::parquet::schema::GroupNode::Make( + "s", ::parquet::Repetition::OPTIONAL, + { + ::parquet::schema::PrimitiveNode::Make("a", ::parquet::Repetition::REQUIRED, + ::parquet::Type::INT32), + ::parquet::schema::PrimitiveNode::Make("b", ::parquet::Repetition::OPTIONAL, + ::parquet::Type::BYTE_ARRAY, + ::parquet::ConvertedType::UTF8), + })}); + + ASSERT_EQ(fields.size(), 1); + const auto& struct_schema = *fields[0]; + EXPECT_EQ(struct_schema.kind, ParquetColumnSchemaKind::STRUCT); + EXPECT_EQ(struct_schema.nullable_definition_level, 1); + ASSERT_EQ(struct_schema.children.size(), 2); + EXPECT_EQ(struct_schema.children[0]->definition_level, 1); + EXPECT_EQ(struct_schema.children[1]->definition_level, 2); + EXPECT_EQ(struct_schema.max_definition_level, 2); + + const auto& struct_type = + assert_cast(*remove_nullable(struct_schema.type)); + ASSERT_EQ(struct_type.get_elements().size(), 2); + EXPECT_TRUE(struct_type.get_elements()[0]->is_nullable()); + EXPECT_TRUE(struct_type.get_elements()[1]->is_nullable()); +} + +TEST(ParquetSchemaTest, ListCompatibilityRulesAndLevels) { + const auto standard_list = ::parquet::schema::GroupNode::Make( + "xs", ::parquet::Repetition::OPTIONAL, + {::parquet::schema::GroupNode::Make( + "list", ::parquet::Repetition::REPEATED, + {::parquet::schema::PrimitiveNode::Make("item", ::parquet::Repetition::OPTIONAL, + ::parquet::Type::INT32)})}, + ::parquet::ConvertedType::LIST); + const auto structural_array = ::parquet::schema::GroupNode::Make( + "ys", ::parquet::Repetition::OPTIONAL, + {::parquet::schema::GroupNode::Make( + "array", ::parquet::Repetition::REPEATED, + {::parquet::schema::PrimitiveNode::Make( + "value", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT64)})}, + ::parquet::ConvertedType::LIST); + + const auto fields = build_fields({standard_list, structural_array}); + ASSERT_EQ(fields.size(), 2); + + const auto& xs = *fields[0]; + EXPECT_EQ(xs.kind, ParquetColumnSchemaKind::LIST); + EXPECT_EQ(xs.definition_level, 2); + EXPECT_EQ(xs.repetition_level, 1); + ASSERT_EQ(xs.children.size(), 1); + EXPECT_EQ(xs.children[0]->name, "element"); + EXPECT_EQ(xs.children[0]->kind, ParquetColumnSchemaKind::PRIMITIVE); + EXPECT_TRUE(xs.children[0]->type->is_nullable()); + const auto& xs_type = assert_cast(*remove_nullable(xs.type)); + EXPECT_TRUE(xs_type.get_nested_type()->is_nullable()); + + const auto& ys = *fields[1]; + EXPECT_EQ(ys.kind, ParquetColumnSchemaKind::LIST); + ASSERT_EQ(ys.children.size(), 1); + EXPECT_EQ(ys.children[0]->kind, ParquetColumnSchemaKind::STRUCT); + EXPECT_EQ(remove_nullable(ys.children[0]->type)->get_primitive_type(), TYPE_STRUCT); +} + +TEST(ParquetSchemaTest, LegacyListElementResolutionRulesArePreserved) { + const auto two_level_list = ::parquet::schema::GroupNode::Make( + "two_level", ::parquet::Repetition::OPTIONAL, + {::parquet::schema::PrimitiveNode::Make("item", ::parquet::Repetition::REPEATED, + ::parquet::Type::INT32)}, + ::parquet::ConvertedType::LIST); + const auto tuple_list = ::parquet::schema::GroupNode::Make( + "tuple_list", ::parquet::Repetition::OPTIONAL, + {::parquet::schema::GroupNode::Make( + "tuple_list_tuple", ::parquet::Repetition::REPEATED, + {::parquet::schema::PrimitiveNode::Make( + "value", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT64)})}, + ::parquet::ConvertedType::LIST); + const auto multi_field_list = ::parquet::schema::GroupNode::Make( + "records", ::parquet::Repetition::OPTIONAL, + {::parquet::schema::GroupNode::Make( + "list", ::parquet::Repetition::REPEATED, + {::parquet::schema::PrimitiveNode::Make("id", ::parquet::Repetition::REQUIRED, + ::parquet::Type::INT32), + ::parquet::schema::PrimitiveNode::Make("name", ::parquet::Repetition::OPTIONAL, + ::parquet::Type::BYTE_ARRAY, + ::parquet::ConvertedType::UTF8)})}, + ::parquet::ConvertedType::LIST); + const auto fields = build_fields({two_level_list, tuple_list, multi_field_list}); + ASSERT_EQ(fields.size(), 3); + + const auto& two_level = *fields[0]; + EXPECT_EQ(two_level.kind, ParquetColumnSchemaKind::LIST); + EXPECT_EQ(two_level.definition_level, 2); + EXPECT_EQ(two_level.repetition_level, 1); + ASSERT_EQ(two_level.children.size(), 1); + EXPECT_EQ(two_level.children[0]->kind, ParquetColumnSchemaKind::PRIMITIVE); + EXPECT_EQ(two_level.children[0]->name, "element"); + EXPECT_EQ(remove_nullable(two_level.children[0]->type)->get_primitive_type(), TYPE_INT); + + const auto& tuple = *fields[1]; + ASSERT_EQ(tuple.children.size(), 1); + EXPECT_EQ(tuple.children[0]->kind, ParquetColumnSchemaKind::STRUCT); + EXPECT_EQ(tuple.children[0]->name, "element"); + ASSERT_EQ(tuple.children[0]->children.size(), 1); + EXPECT_EQ(tuple.children[0]->children[0]->name, "value"); + + const auto& multi_field = *fields[2]; + ASSERT_EQ(multi_field.children.size(), 1); + EXPECT_EQ(multi_field.children[0]->kind, ParquetColumnSchemaKind::STRUCT); + ASSERT_EQ(multi_field.children[0]->children.size(), 2); + EXPECT_EQ(multi_field.children[0]->children[0]->name, "id"); + EXPECT_EQ(multi_field.children[0]->children[1]->name, "name"); +} + +TEST(ParquetSchemaTest, NestedRepeatedInsideListElementIsWrappedOnce) { + const auto list_with_repeated_child = ::parquet::schema::GroupNode::Make( + "outer", ::parquet::Repetition::OPTIONAL, + {::parquet::schema::GroupNode::Make( + "list", ::parquet::Repetition::REPEATED, + {::parquet::schema::PrimitiveNode::Make( + "items", ::parquet::Repetition::REPEATED, ::parquet::Type::INT32)})}, + ::parquet::ConvertedType::LIST); + + const auto fields = build_fields({list_with_repeated_child}); + ASSERT_EQ(fields.size(), 1); + const auto& outer = *fields[0]; + EXPECT_EQ(outer.kind, ParquetColumnSchemaKind::LIST); + ASSERT_EQ(outer.children.size(), 1); + const auto& element = *outer.children[0]; + EXPECT_EQ(element.kind, ParquetColumnSchemaKind::STRUCT); + ASSERT_EQ(element.children.size(), 1); + EXPECT_EQ(element.children[0]->kind, ParquetColumnSchemaKind::LIST); + EXPECT_EQ(element.children[0]->name, "items"); + ASSERT_EQ(element.children[0]->children.size(), 1); + EXPECT_EQ(element.children[0]->children[0]->name, "element"); +} + +TEST(ParquetSchemaTest, ListWrapperWithLogicalAnnotationIsPreservedAsElement) { + const auto annotated_repeated_group = ::parquet::schema::GroupNode::Make( + "xs", ::parquet::Repetition::OPTIONAL, + {::parquet::schema::GroupNode::Make( + "list", ::parquet::Repetition::REPEATED, + {::parquet::schema::PrimitiveNode::Make( + "value", ::parquet::Repetition::OPTIONAL, ::parquet::Type::INT32)}, + ::parquet::ConvertedType::LIST)}, + ::parquet::ConvertedType::LIST); + + EXPECT_FALSE(build_status({annotated_repeated_group}).ok()); + + const auto nested_list_wrapper = ::parquet::schema::GroupNode::Make( + "xs", ::parquet::Repetition::OPTIONAL, + {::parquet::schema::GroupNode::Make( + "list", ::parquet::Repetition::REPEATED, + {::parquet::schema::GroupNode::Make( + "list", ::parquet::Repetition::REPEATED, + {::parquet::schema::PrimitiveNode::Make("value", + ::parquet::Repetition::OPTIONAL, + ::parquet::Type::INT32)})}, + ::parquet::ConvertedType::LIST)}, + ::parquet::ConvertedType::LIST); + + const auto fields = build_fields({nested_list_wrapper}); + ASSERT_EQ(fields.size(), 1); + const auto& xs = *fields[0]; + EXPECT_EQ(xs.kind, ParquetColumnSchemaKind::LIST); + ASSERT_EQ(xs.children.size(), 1); + const auto& element = *xs.children[0]; + EXPECT_EQ(element.kind, ParquetColumnSchemaKind::LIST); + EXPECT_EQ(element.name, "element"); + ASSERT_EQ(element.children.size(), 1); + EXPECT_EQ(element.children[0]->name, "element"); + EXPECT_EQ(remove_nullable(element.children[0]->type)->get_primitive_type(), TYPE_INT); +} + +TEST(ParquetSchemaTest, MapWrapperIsFoldedAndOptionalKeyIsAllowed) { + const auto fields = build_fields({::parquet::schema::GroupNode::Make( + "m", ::parquet::Repetition::OPTIONAL, + {::parquet::schema::GroupNode::Make( + "key_value", ::parquet::Repetition::REPEATED, + { + ::parquet::schema::PrimitiveNode::Make( + "key", ::parquet::Repetition::OPTIONAL, + ::parquet::Type::BYTE_ARRAY, ::parquet::ConvertedType::UTF8), + ::parquet::schema::PrimitiveNode::Make("value", + ::parquet::Repetition::OPTIONAL, + ::parquet::Type::INT32), + })}, + ::parquet::ConvertedType::MAP)}); + + ASSERT_EQ(fields.size(), 1); + const auto& map_schema = *fields[0]; + EXPECT_EQ(map_schema.kind, ParquetColumnSchemaKind::MAP); + EXPECT_EQ(map_schema.definition_level, 2); + EXPECT_EQ(map_schema.repetition_level, 1); + ASSERT_EQ(map_schema.children.size(), 2); + EXPECT_EQ(map_schema.children[0]->name, "key"); + EXPECT_EQ(map_schema.children[1]->name, "value"); + EXPECT_TRUE(map_schema.children[0]->type->is_nullable()); + + const auto& map_type = assert_cast(*remove_nullable(map_schema.type)); + EXPECT_TRUE(map_type.get_key_type()->is_nullable()); + EXPECT_TRUE(map_type.get_value_type()->is_nullable()); +} + +TEST(ParquetSchemaTest, StandardMapLevelsAndDataTypesAreBuiltFromEntryContext) { + const auto fields = build_fields({::parquet::schema::GroupNode::Make( + "m", ::parquet::Repetition::REQUIRED, + {::parquet::schema::GroupNode::Make( + "key_value", ::parquet::Repetition::REPEATED, + { + ::parquet::schema::PrimitiveNode::Make( + "key", ::parquet::Repetition::REQUIRED, + ::parquet::Type::BYTE_ARRAY, ::parquet::ConvertedType::UTF8), + ::parquet::schema::PrimitiveNode::Make("value", + ::parquet::Repetition::OPTIONAL, + ::parquet::Type::INT32), + })}, + ::parquet::ConvertedType::MAP)}); + + ASSERT_EQ(fields.size(), 1); + const auto& map_schema = *fields[0]; + EXPECT_FALSE(map_schema.type->is_nullable()); + EXPECT_EQ(map_schema.definition_level, 1); + EXPECT_EQ(map_schema.repetition_level, 1); + EXPECT_EQ(map_schema.repeated_repetition_level, 1); + EXPECT_EQ(map_schema.max_definition_level, 2); + EXPECT_EQ(map_schema.max_repetition_level, 1); + ASSERT_EQ(map_schema.children.size(), 2); + EXPECT_EQ(map_schema.children[0]->definition_level, 1); + EXPECT_EQ(map_schema.children[0]->repetition_level, 1); + EXPECT_EQ(map_schema.children[1]->definition_level, 2); + EXPECT_EQ(map_schema.children[1]->nullable_definition_level, 2); + + const auto& map_type = assert_cast(*remove_nullable(map_schema.type)); + EXPECT_TRUE(map_type.get_key_type()->is_nullable()); + EXPECT_TRUE(map_type.get_value_type()->is_nullable()); +} + +TEST(ParquetSchemaTest, BareRepeatedFieldsAreWrappedAsLists) { + const auto fields = build_fields({ + ::parquet::schema::PrimitiveNode::Make("items", ::parquet::Repetition::REPEATED, + ::parquet::Type::INT32), + ::parquet::schema::GroupNode::Make( + "links", ::parquet::Repetition::REPEATED, + {::parquet::schema::PrimitiveNode::Make("url", ::parquet::Repetition::OPTIONAL, + ::parquet::Type::BYTE_ARRAY, + ::parquet::ConvertedType::UTF8), + ::parquet::schema::PrimitiveNode::Make("rank", ::parquet::Repetition::OPTIONAL, + ::parquet::Type::INT32)}), + }); + + ASSERT_EQ(fields.size(), 2); + EXPECT_EQ(fields[0]->kind, ParquetColumnSchemaKind::LIST); + ASSERT_EQ(fields[0]->children.size(), 1); + EXPECT_EQ(fields[0]->children[0]->kind, ParquetColumnSchemaKind::PRIMITIVE); + EXPECT_EQ(fields[0]->children[0]->name, "element"); + + EXPECT_EQ(fields[1]->kind, ParquetColumnSchemaKind::LIST); + ASSERT_EQ(fields[1]->children.size(), 1); + EXPECT_EQ(fields[1]->children[0]->kind, ParquetColumnSchemaKind::STRUCT); + EXPECT_EQ(fields[1]->children[0]->name, "element"); +} + +TEST(ParquetSchemaTest, DeepLevelChainPropagatesDefinitionAndRepetitionLevels) { + const auto fields = build_fields({::parquet::schema::GroupNode::Make( + "s", ::parquet::Repetition::OPTIONAL, + {::parquet::schema::GroupNode::Make( + "inner", ::parquet::Repetition::OPTIONAL, + {::parquet::schema::PrimitiveNode::Make( + "items", ::parquet::Repetition::REPEATED, ::parquet::Type::INT32)})})}); + + ASSERT_EQ(fields.size(), 1); + const auto& s = *fields[0]; + EXPECT_EQ(s.definition_level, 1); + EXPECT_EQ(s.nullable_definition_level, 1); + ASSERT_EQ(s.children.size(), 1); + const auto& inner = *s.children[0]; + EXPECT_EQ(inner.definition_level, 2); + EXPECT_EQ(inner.nullable_definition_level, 2); + ASSERT_EQ(inner.children.size(), 1); + const auto& items = *inner.children[0]; + EXPECT_EQ(items.kind, ParquetColumnSchemaKind::LIST); + EXPECT_EQ(items.definition_level, 3); + EXPECT_EQ(items.repetition_level, 1); + EXPECT_EQ(items.repeated_ancestor_definition_level, 3); + EXPECT_EQ(items.repeated_repetition_level, 1); + EXPECT_EQ(items.max_definition_level, 3); + EXPECT_EQ(items.max_repetition_level, 1); + ASSERT_EQ(items.children.size(), 1); + EXPECT_EQ(items.children[0]->definition_level, 3); + EXPECT_EQ(items.children[0]->repetition_level, 1); +} + +TEST(ParquetSchemaTest, BuildEntryValidatesNullPointerAndEmptyRoot) { + auto empty_root = ::parquet::schema::GroupNode::Make("schema", ::parquet::Repetition::REQUIRED, + ::parquet::schema::NodeVector {}); + ::parquet::SchemaDescriptor descriptor; + descriptor.Init(empty_root); + + EXPECT_FALSE(build_parquet_column_schema(descriptor, nullptr).ok()); + + std::vector> fields; + ASSERT_TRUE(build_parquet_column_schema(descriptor, &fields).ok()); + EXPECT_TRUE(fields.empty()); +} + +TEST(ParquetSchemaTest, RejectInvalidListMapAndUnsupportedTime) { + const auto bad_list = ::parquet::schema::GroupNode::Make( + "bad_list", ::parquet::Repetition::OPTIONAL, + {::parquet::schema::PrimitiveNode::Make("item", ::parquet::Repetition::OPTIONAL, + ::parquet::Type::INT32)}, + ::parquet::ConvertedType::LIST); + EXPECT_FALSE(build_status({bad_list}).ok()); + + const auto bad_map = ::parquet::schema::GroupNode::Make( + "bad_map", ::parquet::Repetition::OPTIONAL, + {::parquet::schema::PrimitiveNode::Make("entry", ::parquet::Repetition::REPEATED, + ::parquet::Type::INT32)}, + ::parquet::ConvertedType::MAP); + EXPECT_FALSE(build_status({bad_map}).ok()); + + const auto converted_time = ::parquet::schema::PrimitiveNode::Make( + "time_ms", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT32, + ::parquet::ConvertedType::TIME_MILLIS); + const auto status = build_status({converted_time}); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("Parquet TIME with isAdjustedToUTC=true is not supported"), + std::string::npos); +} + +TEST(ParquetSchemaTest, RejectAdditionalInvalidListAndMapLayouts) { + const auto zero_child_list = ::parquet::schema::GroupNode::Make( + "zero_child_list", ::parquet::Repetition::OPTIONAL, + {::parquet::schema::GroupNode::Make("list", ::parquet::Repetition::REPEATED, + ::parquet::schema::NodeVector {})}, + ::parquet::ConvertedType::LIST); + EXPECT_FALSE(build_status({zero_child_list}).ok()); + + const auto repeated_list = ::parquet::schema::GroupNode::Make( + "repeated_list", ::parquet::Repetition::REPEATED, + {::parquet::schema::GroupNode::Make( + "list", ::parquet::Repetition::REPEATED, + {::parquet::schema::PrimitiveNode::Make("item", ::parquet::Repetition::OPTIONAL, + ::parquet::Type::INT32)})}, + ::parquet::ConvertedType::LIST); + EXPECT_FALSE(build_status({repeated_list}).ok()); + + const auto map_with_two_fields = ::parquet::schema::GroupNode::Make( + "bad_map", ::parquet::Repetition::OPTIONAL, + { + ::parquet::schema::GroupNode::Make( + "entry1", ::parquet::Repetition::REPEATED, + {::parquet::schema::PrimitiveNode::Make( + "key", ::parquet::Repetition::REQUIRED, + ::parquet::Type::BYTE_ARRAY, ::parquet::ConvertedType::UTF8), + ::parquet::schema::PrimitiveNode::Make("value", + ::parquet::Repetition::OPTIONAL, + ::parquet::Type::INT32)}), + ::parquet::schema::GroupNode::Make( + "entry2", ::parquet::Repetition::REPEATED, + {::parquet::schema::PrimitiveNode::Make( + "key", ::parquet::Repetition::REQUIRED, + ::parquet::Type::BYTE_ARRAY, ::parquet::ConvertedType::UTF8), + ::parquet::schema::PrimitiveNode::Make("value", + ::parquet::Repetition::OPTIONAL, + ::parquet::Type::INT32)}), + }, + ::parquet::ConvertedType::MAP); + EXPECT_FALSE(build_status({map_with_two_fields}).ok()); + + const auto non_repeated_map_entry = ::parquet::schema::GroupNode::Make( + "bad_map", ::parquet::Repetition::OPTIONAL, + {::parquet::schema::GroupNode::Make( + "key_value", ::parquet::Repetition::OPTIONAL, + {::parquet::schema::PrimitiveNode::Make("key", ::parquet::Repetition::REQUIRED, + ::parquet::Type::BYTE_ARRAY, + ::parquet::ConvertedType::UTF8), + ::parquet::schema::PrimitiveNode::Make( + "value", ::parquet::Repetition::OPTIONAL, ::parquet::Type::INT32)})}, + ::parquet::ConvertedType::MAP); + EXPECT_FALSE(build_status({non_repeated_map_entry}).ok()); + + const auto map_entry_with_one_child = ::parquet::schema::GroupNode::Make( + "bad_map", ::parquet::Repetition::OPTIONAL, + {::parquet::schema::GroupNode::Make( + "key_value", ::parquet::Repetition::REPEATED, + {::parquet::schema::PrimitiveNode::Make("key", ::parquet::Repetition::REQUIRED, + ::parquet::Type::BYTE_ARRAY, + ::parquet::ConvertedType::UTF8)})}, + ::parquet::ConvertedType::MAP); + EXPECT_FALSE(build_status({map_entry_with_one_child}).ok()); + + const auto repeated_map = ::parquet::schema::GroupNode::Make( + "repeated_map", ::parquet::Repetition::REPEATED, + {::parquet::schema::GroupNode::Make( + "key_value", ::parquet::Repetition::REPEATED, + {::parquet::schema::PrimitiveNode::Make("key", ::parquet::Repetition::REQUIRED, + ::parquet::Type::BYTE_ARRAY, + ::parquet::ConvertedType::UTF8), + ::parquet::schema::PrimitiveNode::Make( + "value", ::parquet::Repetition::OPTIONAL, ::parquet::Type::INT32)})}, + ::parquet::ConvertedType::MAP); + EXPECT_FALSE(build_status({repeated_map}).ok()); +} + +TEST(ParquetSchemaTest, LogicalUtcTimeIsRejected) { + const auto adjusted_time = ::parquet::schema::PrimitiveNode::Make( + "time_ms", ::parquet::Repetition::REQUIRED, + ::parquet::LogicalType::Time(true, ::parquet::LogicalType::TimeUnit::MILLIS), + ::parquet::Type::INT32); + const auto status = build_status({adjusted_time}); + EXPECT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("Parquet TIME with isAdjustedToUTC=true is not supported"), + std::string::npos); +} + +} // namespace doris::format::parquet diff --git a/be/test/format_v2/parquet/parquet_serde_reader_test.cpp b/be/test/format_v2/parquet/parquet_serde_reader_test.cpp new file mode 100644 index 00000000000000..c35138e3263723 --- /dev/null +++ b/be/test/format_v2/parquet/parquet_serde_reader_test.cpp @@ -0,0 +1,459 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "core/assert_cast.h" +#include "core/column/column_decimal.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type.h" +#include "core/data_type/data_type_nullable.h" +#include "core/types.h" +#include "format_v2/parquet/parquet_column_schema.h" +#include "format_v2/parquet/reader/column_reader.h" + +namespace doris::format::parquet { +namespace { + +constexpr int64_t ROW_COUNT = 5; + +std::shared_ptr finish_array(arrow::ArrayBuilder* builder) { + std::shared_ptr array; + EXPECT_TRUE(builder->Finish(&array).ok()); + return array; +} + +class ParquetSerdeReaderTest : public testing::Test { +protected: + void SetUp() override { + _test_dir = std::filesystem::temp_directory_path() / "doris_parquet_serde_reader_test"; + std::filesystem::remove_all(_test_dir); + std::filesystem::create_directories(_test_dir); + _file_path = (_test_dir / "serde.parquet").string(); + write_parquet_file(); + open_file(_file_path); + } + + void TearDown() override { std::filesystem::remove_all(_test_dir); } + + template + std::shared_ptr build_required_array(const std::vector& values) { + Builder builder; + for (const auto& value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return finish_array(&builder); + } + + std::shared_ptr build_nullable_int32_array() { + arrow::Int32Builder builder; + EXPECT_TRUE(builder.Append(1).ok()); + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.Append(3).ok()); + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.Append(5).ok()); + return finish_array(&builder); + } + + std::shared_ptr build_nullable_float16_array() { + arrow::HalfFloatBuilder builder; + EXPECT_TRUE(builder.AppendNull().ok()); + EXPECT_TRUE(builder.Append(0x0000).ok()); + EXPECT_TRUE(builder.Append(0x8000).ok()); + EXPECT_TRUE(builder.Append(0x3E00).ok()); + EXPECT_TRUE(builder.Append(0x7E00).ok()); + return finish_array(&builder); + } + + std::shared_ptr build_binary_array(const std::vector& values) { + arrow::BinaryBuilder builder; + for (const auto& value : values) { + EXPECT_TRUE(builder.Append(reinterpret_cast(value.data()), + static_cast(value.size())) + .ok()); + } + return finish_array(&builder); + } + + std::shared_ptr build_string_array(const std::vector& values) { + arrow::StringBuilder builder; + for (const auto& value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return finish_array(&builder); + } + + std::shared_ptr build_fixed_binary_array( + const std::shared_ptr& type, const std::vector& values) { + arrow::FixedSizeBinaryBuilder builder(type, arrow::default_memory_pool()); + for (const auto& value : values) { + EXPECT_TRUE(builder.Append(reinterpret_cast(value.data())).ok()); + } + return finish_array(&builder); + } + + std::shared_ptr build_timestamp_array( + const std::shared_ptr& type, const std::vector& values) { + arrow::TimestampBuilder builder(type, arrow::default_memory_pool()); + for (const auto value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return finish_array(&builder); + } + + std::shared_ptr build_decimal_array(const std::shared_ptr& type, + const std::vector& values) { + arrow::Decimal128Builder builder(type, arrow::default_memory_pool()); + for (const auto value : values) { + EXPECT_TRUE(builder.Append(arrow::Decimal128(value)).ok()); + } + return finish_array(&builder); + } + + void add_field(const std::shared_ptr& field, + std::shared_ptr array) { + _arrow_fields.push_back(field); + _arrays.push_back(std::move(array)); + } + + void write_table(const std::string& file_path, const std::shared_ptr& table, + std::shared_ptr<::parquet::ArrowWriterProperties> arrow_properties = nullptr) { + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + ::parquet::WriterProperties::Builder writer_builder; + writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6); + writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + writer_builder.compression(::parquet::Compression::UNCOMPRESSED); + if (arrow_properties == nullptr) { + ::parquet::ArrowWriterProperties::Builder arrow_builder; + arrow_properties = arrow_builder.build(); + } + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable( + *table, arrow::default_memory_pool(), *file_result, ROW_COUNT, + writer_builder.build(), std::move(arrow_properties))); + } + + void write_parquet_file() { + add_field(arrow::field("bool_col", arrow::boolean(), false), + build_required_array( + {true, false, true, false, true})); + add_field(arrow::field("int32_col", arrow::int32(), false), + build_required_array({10, 20, 30, 40, 50})); + add_field(arrow::field("int64_col", arrow::int64(), false), + build_required_array( + {10000000000L, -9L, 42L, 77L, 123L})); + add_field(arrow::field("uint32_col", arrow::uint32(), false), + build_required_array( + {0U, 1U, 1U << 31, std::numeric_limits::max(), 42U})); + add_field(arrow::field("uint64_col", arrow::uint64(), false), + build_required_array( + {0ULL, 1ULL, 1ULL << 63, std::numeric_limits::max(), 42ULL})); + add_field(arrow::field("float_col", arrow::float32(), false), + build_required_array( + {1.5F, -2.25F, 3.0F, 4.5F, 5.75F})); + add_field(arrow::field("double_col", arrow::float64(), false), + build_required_array({3.5, -4.75, 6.0, 7.25, 8.5})); + add_field(arrow::field("nullable_float16_col", arrow::float16(), true), + build_nullable_float16_array()); + add_field(arrow::field("binary_col", arrow::binary(), false), + build_binary_array({"bin_a", "bin_b", "bin_c", "bin_d", "bin_e"})); + add_field(arrow::field("string_col", arrow::utf8(), false), + build_string_array({"alpha", "beta", "gamma", "delta", "epsilon"})); + add_field(arrow::field("fixed_binary_col", arrow::fixed_size_binary(4), false), + build_fixed_binary_array(arrow::fixed_size_binary(4), + {"aaaa", "bbbb", "cccc", "dddd", "eeee"})); + add_field(arrow::field("date_col", arrow::date32(), false), + build_required_array({0, 1, 18628, 18629, 18630})); + add_field(arrow::field("timestamp_millis_col", arrow::timestamp(arrow::TimeUnit::MILLI), + false), + build_timestamp_array(arrow::timestamp(arrow::TimeUnit::MILLI), + {0, 1234, 1609459200000, 1609459201000, -1})); + add_field(arrow::field("timestamp_micros_col", arrow::timestamp(arrow::TimeUnit::MICRO), + false), + build_timestamp_array(arrow::timestamp(arrow::TimeUnit::MICRO), + {0, 1234567, 1609459200000000, 1609459201000000, -1})); + add_field(arrow::field("timestamp_micros_utc_col", + arrow::timestamp(arrow::TimeUnit::MICRO, "UTC"), false), + build_timestamp_array(arrow::timestamp(arrow::TimeUnit::MICRO, "UTC"), + {0, 1234567, 1609459200000000, 1609459201000000, -1})); + add_field(arrow::field("decimal_fixed_binary_9_2_col", arrow::decimal128(9, 2), false), + build_decimal_array(arrow::decimal128(9, 2), {12345, -67, 0, 987, 1000})); + add_field(arrow::field("decimal_fixed_binary_18_6_col", arrow::decimal128(18, 6), false), + build_decimal_array(arrow::decimal128(18, 6), + {1234567, -670000, 0, 9870000, 1000000})); + add_field(arrow::field("nullable_int_col", arrow::int32(), true), + build_nullable_int32_array()); + + write_table(_file_path, arrow::Table::Make(arrow::schema(_arrow_fields), _arrays)); + } + + void open_file(const std::string& file_path) { + _file_reader = ::parquet::ParquetFileReader::OpenFile(file_path, false); + ASSERT_NE(_file_reader, nullptr); + ASSERT_EQ(_file_reader->metadata()->num_row_groups(), 1); + _row_group = _file_reader->RowGroup(0); + ASSERT_NE(_row_group, nullptr); + auto schema_descriptor = _file_reader->metadata()->schema(); + ASSERT_NE(schema_descriptor, nullptr); + auto st = build_parquet_column_schema(*schema_descriptor, &_fields); + ASSERT_TRUE(st.ok()) << st; + } + + size_t find_field_idx(const std::string& name) const { + for (size_t field_idx = 0; field_idx < _fields.size(); ++field_idx) { + if (_fields[field_idx]->name == name) { + return field_idx; + } + } + ADD_FAILURE() << "Cannot find parquet serde test field " << name; + return _fields.size(); + } + + std::unique_ptr create_reader(size_t field_idx) const { + ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns()); + std::unique_ptr reader; + auto st = factory.create(*_fields[field_idx], &reader); + EXPECT_TRUE(st.ok()) << st; + return reader; + } + + template + void read_and_validate(const std::string& name, Validator validator) const { + const auto field_idx = find_field_idx(name); + ASSERT_TRUE(supports_record_reader(_fields[field_idx]->type_descriptor)); + auto reader = create_reader(field_idx); + ASSERT_NE(reader, nullptr); + MutableColumnPtr column = reader->type()->create_column(); + int64_t rows_read = 0; + auto st = reader->read(ROW_COUNT, column, &rows_read); + ASSERT_TRUE(st.ok()) << st; + ASSERT_EQ(rows_read, ROW_COUNT); + ASSERT_EQ(column->size(), ROW_COUNT); + validator(*_fields[field_idx], *column); + } + + std::filesystem::path _test_dir; + std::string _file_path; + std::unique_ptr<::parquet::ParquetFileReader> _file_reader; + std::shared_ptr<::parquet::RowGroupReader> _row_group; + std::vector> _fields; + std::vector> _arrow_fields; + std::vector> _arrays; +}; + +TEST_F(ParquetSerdeReaderTest, ReadAllSupportedPhysicalAndLogicalTypes) { + read_and_validate("bool_col", [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::BOOLEAN); + const auto& values = assert_cast(column); + EXPECT_EQ(values.get_element(0), 1); + EXPECT_EQ(values.get_element(1), 0); + EXPECT_EQ(values.get_element(4), 1); + }); + read_and_validate("int32_col", [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT32); + const auto& values = assert_cast(column); + EXPECT_EQ(values.get_element(0), 10); + EXPECT_EQ(values.get_element(4), 50); + }); + read_and_validate("int64_col", [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT64); + const auto& values = assert_cast(column); + EXPECT_EQ(values.get_element(0), 10000000000L); + EXPECT_EQ(values.get_element(1), -9L); + }); + read_and_validate("uint32_col", [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT32); + EXPECT_TRUE(schema.type_descriptor.is_unsigned_integer); + EXPECT_EQ(schema.type_descriptor.integer_bit_width, 32); + EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_BIGINT); + const auto& values = assert_cast(column); + EXPECT_EQ(values.get_element(2), 2147483648L); + EXPECT_EQ(values.get_element(3), + static_cast(std::numeric_limits::max())); + }); + read_and_validate("uint64_col", [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT64); + EXPECT_TRUE(schema.type_descriptor.is_unsigned_integer); + EXPECT_EQ(schema.type_descriptor.integer_bit_width, 64); + EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_LARGEINT); + const auto& values = assert_cast(column); + EXPECT_EQ(values.get_element(2), static_cast(1) << 63); + EXPECT_EQ(values.get_element(3), + static_cast(std::numeric_limits::max())); + }); + read_and_validate("float_col", [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::FLOAT); + const auto& values = assert_cast(column); + EXPECT_FLOAT_EQ(values.get_element(0), 1.5F); + EXPECT_FLOAT_EQ(values.get_element(1), -2.25F); + }); + read_and_validate("double_col", [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::DOUBLE); + const auto& values = assert_cast(column); + EXPECT_DOUBLE_EQ(values.get_element(0), 3.5); + EXPECT_DOUBLE_EQ(values.get_element(1), -4.75); + }); + read_and_validate("nullable_float16_col", [](const ParquetColumnSchema& schema, + const IColumn& column) { + EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::FIXED_LEN_BYTE_ARRAY); + EXPECT_EQ(schema.type_descriptor.fixed_length, 2); + EXPECT_EQ(schema.type_descriptor.extra_type_info, ParquetExtraTypeInfo::FLOAT16); + EXPECT_FALSE(schema.type_descriptor.is_string_like); + EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_FLOAT); + const auto& nullable_column = assert_cast(column); + const auto& values = assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_TRUE(nullable_column.is_null_at(0)); + EXPECT_FLOAT_EQ(values.get_element(1), 0.0F); + EXPECT_FALSE(std::signbit(values.get_element(1))); + EXPECT_FLOAT_EQ(values.get_element(2), -0.0F); + EXPECT_TRUE(std::signbit(values.get_element(2))); + EXPECT_FLOAT_EQ(values.get_element(3), 1.5F); + EXPECT_TRUE(std::isnan(values.get_element(4))); + }); + read_and_validate("binary_col", [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::BYTE_ARRAY); + const auto& values = assert_cast(column); + EXPECT_EQ(values.get_data_at(0).to_string(), "bin_a"); + EXPECT_EQ(values.get_data_at(3).to_string(), "bin_d"); + }); + read_and_validate("string_col", [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type_descriptor.is_string_like); + const auto& values = assert_cast(column); + EXPECT_EQ(values.get_data_at(0).to_string(), "alpha"); + EXPECT_EQ(values.get_data_at(4).to_string(), "epsilon"); + }); + read_and_validate("fixed_binary_col", [](const ParquetColumnSchema& schema, + const IColumn& column) { + EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::FIXED_LEN_BYTE_ARRAY); + EXPECT_EQ(schema.type_descriptor.fixed_length, 4); + const auto& values = assert_cast(column); + EXPECT_EQ(values.get_data_at(0).to_string(), "aaaa"); + EXPECT_EQ(values.get_data_at(2).to_string(), "cccc"); + }); + read_and_validate("date_col", [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT32); + EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_DATEV2); + EXPECT_EQ(schema.type->to_string(column, 0), "1970-01-01"); + EXPECT_EQ(schema.type->to_string(column, 2), "2021-01-01"); + }); + read_and_validate( + "timestamp_millis_col", [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT64); + EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_DATETIMEV2); + EXPECT_EQ(schema.type->to_string(column, 1), "1970-01-01 00:00:01.234"); + EXPECT_EQ(schema.type->to_string(column, 4), "1969-12-31 23:59:59.999"); + }); + read_and_validate( + "timestamp_micros_col", [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT64); + EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_DATETIMEV2); + EXPECT_EQ(schema.type->to_string(column, 1), "1970-01-01 00:00:01.234567"); + EXPECT_EQ(schema.type->to_string(column, 4), "1969-12-31 23:59:59.999999"); + }); + read_and_validate("timestamp_micros_utc_col", [](const ParquetColumnSchema& schema, + const IColumn& column) { + EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT64); + EXPECT_TRUE(schema.type_descriptor.timestamp_is_adjusted_to_utc); + EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_DATETIMEV2); + EXPECT_EQ(schema.type->to_string(column, 1), "1970-01-01 00:00:01.234567"); + EXPECT_EQ(schema.type->to_string(column, 4), "1969-12-31 23:59:59.999999"); + }); + read_and_validate("decimal_fixed_binary_9_2_col", [](const ParquetColumnSchema& schema, + const IColumn& column) { + EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::FIXED_LEN_BYTE_ARRAY); + EXPECT_TRUE(schema.type_descriptor.is_decimal); + EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_DECIMAL32); + const auto& values = assert_cast(column); + EXPECT_EQ(values.get_element(0), Decimal32(12345)); + EXPECT_EQ(schema.type->to_string(column, 0), "123.45"); + }); + read_and_validate("decimal_fixed_binary_18_6_col", [](const ParquetColumnSchema& schema, + const IColumn& column) { + EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::FIXED_LEN_BYTE_ARRAY); + EXPECT_TRUE(schema.type_descriptor.is_decimal); + EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_DECIMAL64); + const auto& values = assert_cast(column); + EXPECT_EQ(values.get_element(0), Decimal64(1234567)); + EXPECT_EQ(schema.type->to_string(column, 0), "1.234567"); + }); + read_and_validate( + "nullable_int_col", [](const ParquetColumnSchema& schema, const IColumn& column) { + EXPECT_TRUE(schema.type->is_nullable()); + const auto& nullable_column = assert_cast(column); + const auto& nested_column = + assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(nullable_column.size(), ROW_COUNT); + EXPECT_FALSE(nullable_column.is_null_at(0)); + EXPECT_TRUE(nullable_column.is_null_at(1)); + EXPECT_FALSE(nullable_column.is_null_at(2)); + EXPECT_TRUE(nullable_column.is_null_at(3)); + EXPECT_EQ(nested_column.get_element(0), 1); + EXPECT_EQ(nested_column.get_element(2), 3); + }); +} + +TEST_F(ParquetSerdeReaderTest, ReadInt96TimestampAsDateTimeV2) { + const auto file_path = (_test_dir / "int96_timestamp.parquet").string(); + auto field = arrow::field("col_datetime", arrow::timestamp(arrow::TimeUnit::MICRO), false); + auto array = build_timestamp_array(arrow::timestamp(arrow::TimeUnit::MICRO), + {0, 1234567, 1609459200000000, 1609459201000000, -1}); + auto table = arrow::Table::Make(arrow::schema({field}), {array}); + + ::parquet::ArrowWriterProperties::Builder arrow_builder; + arrow_builder.enable_force_write_int96_timestamps(); + _fields.clear(); + _file_reader.reset(); + _row_group.reset(); + write_table(file_path, table, arrow_builder.build()); + open_file(file_path); + + ASSERT_EQ(_fields.size(), 1); + EXPECT_EQ(_fields[0]->type_descriptor.physical_type, ::parquet::Type::INT96); + EXPECT_EQ(_fields[0]->type_descriptor.extra_type_info, ParquetExtraTypeInfo::IMPALA_TIMESTAMP); + ASSERT_TRUE(supports_record_reader(_fields[0]->type_descriptor)); + ASSERT_EQ(remove_nullable(_fields[0]->type)->get_primitive_type(), TYPE_DATETIMEV2); + + auto reader = create_reader(0); + ASSERT_NE(reader, nullptr); + auto column = _fields[0]->type->create_column(); + int64_t rows_read = 0; + ASSERT_TRUE(reader->read(ROW_COUNT, column, &rows_read).ok()); + ASSERT_EQ(rows_read, ROW_COUNT); + EXPECT_EQ(_fields[0]->type->to_string(*column, 0), "1970-01-01 00:00:00.000000"); + EXPECT_EQ(_fields[0]->type->to_string(*column, 1), "1970-01-01 00:00:01.234567"); + EXPECT_EQ(_fields[0]->type->to_string(*column, 2), "2021-01-01 00:00:00.000000"); + EXPECT_EQ(_fields[0]->type->to_string(*column, 4), "1969-12-31 23:59:59.999999"); +} + +} // namespace +} // namespace doris::format::parquet diff --git a/be/test/format_v2/parquet/parquet_statistics_test.cpp b/be/test/format_v2/parquet/parquet_statistics_test.cpp new file mode 100644 index 00000000000000..f2ae2448013d26 --- /dev/null +++ b/be/test/format_v2/parquet/parquet_statistics_test.cpp @@ -0,0 +1,460 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/parquet_statistics.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/field.h" +#include "format_v2/file_reader.h" +#include "format_v2/parquet/parquet_column_schema.h" +#include "storage/predicate/accept_null_predicate.h" +#include "storage/predicate/null_predicate.h" +#include "storage/predicate/predicate_creator.h" + +namespace doris { +namespace { + +format::parquet::ParquetColumnSchema primitive_bloom_schema(const DataTypePtr& type) { + format::parquet::ParquetColumnSchema schema; + schema.local_id = 0; + schema.name = "c0"; + schema.type = type; + schema.leaf_column_id = 0; + schema.kind = format::parquet::ParquetColumnSchemaKind::PRIMITIVE; + return schema; +} + +format::FileColumnPredicateFilter bloom_filter_with_predicate( + const std::shared_ptr& predicate) { + format::FileColumnPredicateFilter filter; + filter.file_column_id = format::LocalColumnId(0); + filter.target = format::FileNestedPredicateTarget(filter.file_column_id); + filter.predicates.push_back(predicate); + return filter; +} + +std::shared_ptr finish_array(arrow::ArrayBuilder* builder) { + std::shared_ptr array; + EXPECT_TRUE(builder->Finish(&array).ok()); + return array; +} + +std::shared_ptr int32_array(const std::vector>& values) { + arrow::Int32Builder builder; + for (const auto& value : values) { + if (value.has_value()) { + EXPECT_TRUE(builder.Append(*value).ok()); + } else { + EXPECT_TRUE(builder.AppendNull().ok()); + } + } + return finish_array(&builder); +} + +std::shared_ptr uint32_array(const std::vector& values) { + arrow::UInt32Builder builder; + for (const auto value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return finish_array(&builder); +} + +std::shared_ptr string_array(const std::vector& values) { + arrow::StringBuilder builder; + for (const auto& value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return finish_array(&builder); +} + +std::shared_ptr timestamp_array(const std::vector& values) { + arrow::TimestampBuilder builder(arrow::timestamp(arrow::TimeUnit::MICRO, "UTC"), + arrow::default_memory_pool()); + for (const auto value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return finish_array(&builder); +} + +std::unique_ptr<::parquet::ParquetFileReader> make_reader( + const std::shared_ptr& table, int64_t row_group_size, bool enable_dictionary, + bool enable_statistics) { + auto out_result = arrow::io::BufferOutputStream::Create(); + EXPECT_TRUE(out_result.ok()); + auto out = *out_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.compression(::parquet::Compression::UNCOMPRESSED); + if (enable_dictionary) { + builder.enable_dictionary(); + } else { + builder.disable_dictionary(); + } + if (!enable_statistics) { + builder.disable_statistics(); + } + EXPECT_TRUE(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + row_group_size, builder.build()) + .ok()); + auto buffer_result = out->Finish(); + EXPECT_TRUE(buffer_result.ok()); + return ::parquet::ParquetFileReader::Open( + std::make_shared(*buffer_result)); +} + +std::vector> build_file_schema( + const ::parquet::ParquetFileReader& reader) { + std::vector> file_schema; + EXPECT_TRUE( + format::parquet::build_parquet_column_schema(*reader.metadata()->schema(), &file_schema) + .ok()); + return file_schema; +} + +format::FileScanRequest request_with_filter(format::FileColumnPredicateFilter filter) { + format::FileScanRequest request; + request.column_predicate_filters.push_back(std::move(filter)); + return request; +} + +::parquet::BlockSplitBloomFilter bloom_filter_for_int32_values(const std::vector& values) { + ::parquet::BlockSplitBloomFilter bloom_filter; + bloom_filter.Init(::parquet::BlockSplitBloomFilter::kMinimumBloomFilterBytes); + for (const auto value : values) { + bloom_filter.InsertHash(bloom_filter.Hash(value)); + } + return bloom_filter; +} + +TEST(ParquetStatisticsTransformTest, ConvertsMinMaxNullCountUnsignedStringAndTimestamp) { + auto table = arrow::Table::Make( + arrow::schema({ + arrow::field("i", arrow::int32(), true), + arrow::field("u", arrow::uint32(), false), + arrow::field("s", arrow::utf8(), false), + arrow::field("ts", arrow::timestamp(arrow::TimeUnit::MICRO, "UTC"), false), + }), + {int32_array({1, std::nullopt, 5}), uint32_array({7, 9, 11}), + string_array({"alpha", "beta", "omega"}), timestamp_array({1000, 2000, 3000})}); + auto reader = make_reader(table, 3, false, true); + auto schema = build_file_schema(*reader); + auto row_group = reader->metadata()->RowGroup(0); + + const auto int_stats = format::parquet::ParquetStatisticsUtils::TransformColumnStatistics( + *schema[0], row_group->ColumnChunk(0)->statistics()); + EXPECT_TRUE(int_stats.has_min_max); + EXPECT_TRUE(int_stats.has_null_count); + EXPECT_TRUE(int_stats.has_null); + EXPECT_TRUE(int_stats.has_not_null); + EXPECT_EQ(int_stats.min_value.get(), 1); + EXPECT_EQ(int_stats.max_value.get(), 5); + + const auto uint_stats = format::parquet::ParquetStatisticsUtils::TransformColumnStatistics( + *schema[1], row_group->ColumnChunk(1)->statistics()); + EXPECT_TRUE(uint_stats.has_min_max); + EXPECT_EQ(uint_stats.min_value.get(), 7); + EXPECT_EQ(uint_stats.max_value.get(), 11); + + const auto string_stats = format::parquet::ParquetStatisticsUtils::TransformColumnStatistics( + *schema[2], row_group->ColumnChunk(2)->statistics()); + EXPECT_TRUE(string_stats.has_min_max); + EXPECT_EQ(string_stats.min_value.get(), "alpha"); + EXPECT_EQ(string_stats.max_value.get(), "omega"); + + auto utc = cctz::utc_time_zone(); + const auto timestamp_stats = format::parquet::ParquetStatisticsUtils::TransformColumnStatistics( + *schema[3], row_group->ColumnChunk(3)->statistics(), &utc); + EXPECT_TRUE(timestamp_stats.has_min_max); + EXPECT_EQ(timestamp_stats.min_value.get_type(), TYPE_DATETIMEV2); + EXPECT_EQ(timestamp_stats.max_value.get_type(), TYPE_DATETIMEV2); + EXPECT_LT(timestamp_stats.min_value, timestamp_stats.max_value); +} + +TEST(ParquetStatisticsTransformTest, HandlesMissingStatisticsAndAllNullChunks) { + auto no_stats_table = arrow::Table::Make( + arrow::schema({arrow::field("i", arrow::int32(), true)}), {int32_array({1, 2, 3})}); + auto no_stats_reader = make_reader(no_stats_table, 3, false, false); + auto no_stats_schema = build_file_schema(*no_stats_reader); + auto no_stats = format::parquet::ParquetStatisticsUtils::TransformColumnStatistics( + *no_stats_schema[0], + no_stats_reader->metadata()->RowGroup(0)->ColumnChunk(0)->statistics()); + EXPECT_FALSE(no_stats.has_min_max); + + auto all_null_table = + arrow::Table::Make(arrow::schema({arrow::field("i", arrow::int32(), true)}), + {int32_array({std::nullopt, std::nullopt})}); + auto all_null_reader = make_reader(all_null_table, 2, false, true); + auto all_null_schema = build_file_schema(*all_null_reader); + auto all_null_stats = format::parquet::ParquetStatisticsUtils::TransformColumnStatistics( + *all_null_schema[0], + all_null_reader->metadata()->RowGroup(0)->ColumnChunk(0)->statistics()); + EXPECT_TRUE(all_null_stats.has_null_count); + EXPECT_TRUE(all_null_stats.has_null); + EXPECT_FALSE(all_null_stats.has_not_null); + EXPECT_FALSE(all_null_stats.has_min_max); +} + +TEST(ParquetStatisticsPruningTest, StatisticsPredicatesAndNullPredicatesPruneRowGroups) { + auto table = arrow::Table::Make(arrow::schema({arrow::field("i", arrow::int32(), true)}), + {int32_array({std::nullopt, std::nullopt, 3, 4, 5, 6})}); + auto reader = make_reader(table, 2, false, true); + auto schema = build_file_schema(*reader); + + format::FileColumnPredicateFilter ge_filter; + ge_filter.file_column_id = format::LocalColumnId(0); + ge_filter.predicates.push_back(create_comparison_predicate( + 0, "i", schema[0]->type, Field::create_field(5), false)); + std::vector selected; + format::parquet::ParquetPruningStats pruning_stats; + ASSERT_TRUE(format::parquet::select_row_groups_by_statistics( + *reader->metadata(), reader.get(), schema, request_with_filter(ge_filter), + nullptr, &selected, false, &pruning_stats) + .ok()); + EXPECT_EQ(selected, std::vector({2})); + EXPECT_EQ(pruning_stats.filtered_row_groups_by_statistics, 2); + + format::FileColumnPredicateFilter is_not_null_filter; + is_not_null_filter.file_column_id = format::LocalColumnId(0); + is_not_null_filter.predicates.push_back( + std::make_shared(0, "i", false, TYPE_INT)); + selected.clear(); + ASSERT_TRUE(format::parquet::select_row_groups_by_statistics( + *reader->metadata(), reader.get(), schema, + request_with_filter(is_not_null_filter), nullptr, &selected, false, + &pruning_stats) + .ok()); + EXPECT_EQ(selected, std::vector({1, 2})); + + format::FileColumnPredicateFilter is_null_filter; + is_null_filter.file_column_id = format::LocalColumnId(0); + is_null_filter.predicates.push_back(std::make_shared(0, "i", true, TYPE_INT)); + selected.clear(); + ASSERT_TRUE(format::parquet::select_row_groups_by_statistics( + *reader->metadata(), reader.get(), schema, + request_with_filter(is_null_filter), nullptr, &selected, false, + &pruning_stats) + .ok()); + EXPECT_EQ(selected, std::vector({0})); +} + +TEST(ParquetStatisticsPruningTest, DictionaryPruningHandlesExcludeIncludeAndUnsupportedPaths) { + auto table = arrow::Table::Make(arrow::schema({arrow::field("s", arrow::utf8(), false)}), + {string_array({"alpha", "beta", "gamma", "omega"})}); + auto reader = make_reader(table, 2, true, false); + auto schema = build_file_schema(*reader); + + format::FileColumnPredicateFilter absent_filter; + absent_filter.file_column_id = format::LocalColumnId(0); + absent_filter.predicates.push_back(create_comparison_predicate( + 0, "s", schema[0]->type, Field::create_field("missing"), false)); + std::vector selected; + format::parquet::ParquetPruningStats pruning_stats; + ASSERT_TRUE(format::parquet::select_row_groups_by_statistics( + *reader->metadata(), reader.get(), schema, + request_with_filter(absent_filter), nullptr, &selected, false, + &pruning_stats) + .ok()); + EXPECT_TRUE(selected.empty()); + EXPECT_EQ(pruning_stats.filtered_row_groups_by_dictionary, 2); + + format::FileColumnPredicateFilter present_filter; + present_filter.file_column_id = format::LocalColumnId(0); + present_filter.predicates.push_back(create_comparison_predicate( + 0, "s", schema[0]->type, Field::create_field("gamma"), false)); + selected.clear(); + pruning_stats = {}; + ASSERT_TRUE(format::parquet::select_row_groups_by_statistics( + *reader->metadata(), reader.get(), schema, + request_with_filter(present_filter), nullptr, &selected, false, + &pruning_stats) + .ok()); + EXPECT_EQ(selected, std::vector({1})); + EXPECT_EQ(pruning_stats.filtered_row_groups_by_dictionary, 1); + + auto plain_reader = make_reader(table, 2, false, false); + auto plain_schema = build_file_schema(*plain_reader); + selected.clear(); + pruning_stats = {}; + ASSERT_TRUE(format::parquet::select_row_groups_by_statistics( + *plain_reader->metadata(), plain_reader.get(), plain_schema, + request_with_filter(absent_filter), nullptr, &selected, false, + &pruning_stats) + .ok()); + EXPECT_EQ(selected, std::vector({0, 1})); + EXPECT_EQ(pruning_stats.filtered_row_groups_by_dictionary, 0); +} + +TEST(ParquetStatisticsPruningTest, StatisticsRunsBeforeDictionaryAndMissingBloomKeepsRows) { + auto table = arrow::Table::Make(arrow::schema({arrow::field("s", arrow::utf8(), false)}), + {string_array({"alpha", "beta", "gamma", "omega"})}); + auto reader = make_reader(table, 2, true, true); + auto schema = build_file_schema(*reader); + + format::FileColumnPredicateFilter beyond_max_filter; + beyond_max_filter.file_column_id = format::LocalColumnId(0); + beyond_max_filter.predicates.push_back(create_comparison_predicate( + 0, "s", schema[0]->type, Field::create_field("zzzz"), false)); + std::vector selected; + format::parquet::ParquetPruningStats pruning_stats; + ASSERT_TRUE(format::parquet::select_row_groups_by_statistics( + *reader->metadata(), reader.get(), schema, + request_with_filter(beyond_max_filter), nullptr, &selected, true, + &pruning_stats) + .ok()); + EXPECT_TRUE(selected.empty()); + EXPECT_EQ(pruning_stats.filtered_row_groups_by_statistics, 2); + EXPECT_EQ(pruning_stats.filtered_row_groups_by_dictionary, 0); + EXPECT_EQ(pruning_stats.filtered_row_groups_by_bloom_filter, 0); + + auto no_stats_reader = make_reader(table, 2, false, false); + auto no_stats_schema = build_file_schema(*no_stats_reader); + format::FileColumnPredicateFilter missing_bloom_filter; + missing_bloom_filter.file_column_id = format::LocalColumnId(0); + missing_bloom_filter.predicates.push_back(create_comparison_predicate( + 0, "s", no_stats_schema[0]->type, Field::create_field("absent"), false)); + selected.clear(); + pruning_stats = {}; + ASSERT_TRUE(format::parquet::select_row_groups_by_statistics( + *no_stats_reader->metadata(), no_stats_reader.get(), no_stats_schema, + request_with_filter(missing_bloom_filter), nullptr, &selected, true, + &pruning_stats) + .ok()); + EXPECT_EQ(selected, std::vector({0, 1})); + EXPECT_EQ(pruning_stats.filtered_row_groups_by_bloom_filter, 0); +} + +::parquet::BlockSplitBloomFilter bloom_filter_for_string_values( + const std::vector& values) { + ::parquet::BlockSplitBloomFilter bloom_filter; + bloom_filter.Init(::parquet::BlockSplitBloomFilter::kMinimumBloomFilterBytes); + for (const auto& value : values) { + ::parquet::ByteArray byte_array(static_cast(value.size()), + reinterpret_cast(value.data())); + bloom_filter.InsertHash(bloom_filter.Hash(&byte_array)); + } + return bloom_filter; +} + +TEST(ParquetBloomFilterPruningTest, EqPredicateUsesArrowHashAndPrunesAbsentIntValue) { + auto schema = primitive_bloom_schema(std::make_shared()); + auto bloom_filter = bloom_filter_for_int32_values({1, 3}); + auto absent_filter = bloom_filter_with_predicate(create_comparison_predicate( + 0, "c0", schema.type, Field::create_field(2), false)); + auto present_filter = + bloom_filter_with_predicate(create_comparison_predicate( + 0, "c0", schema.type, Field::create_field(3), false)); + + EXPECT_TRUE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(schema, absent_filter, + bloom_filter)); + EXPECT_FALSE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes( + schema, present_filter, bloom_filter)); +} + +TEST(ParquetBloomFilterPruningTest, InPredicatePrunesOnlyWhenAllValuesAreAbsent) { + auto schema = primitive_bloom_schema(std::make_shared()); + auto bloom_filter = bloom_filter_for_int32_values({1, 3}); + + auto absent_set = build_set(); + int32_t absent_first = 2; + int32_t absent_second = 4; + absent_set->insert(&absent_first); + absent_set->insert(&absent_second); + auto absent_filter = + bloom_filter_with_predicate(create_in_list_predicate( + 0, "c0", schema.type, absent_set, false)); + + auto present_set = build_set(); + int32_t present_first = 2; + int32_t present_second = 3; + present_set->insert(&present_first); + present_set->insert(&present_second); + auto present_filter = + bloom_filter_with_predicate(create_in_list_predicate( + 0, "c0", schema.type, present_set, false)); + + EXPECT_TRUE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(schema, absent_filter, + bloom_filter)); + EXPECT_FALSE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes( + schema, present_filter, bloom_filter)); +} + +TEST(ParquetBloomFilterPruningTest, BooleanPredicateHashesAsParquetInt32) { + auto schema = primitive_bloom_schema(std::make_shared()); + auto bloom_filter = bloom_filter_for_int32_values({1}); + auto false_filter = bloom_filter_with_predicate(create_comparison_predicate( + 0, "c0", schema.type, Field::create_field(false), false)); + auto true_filter = bloom_filter_with_predicate(create_comparison_predicate( + 0, "c0", schema.type, Field::create_field(true), false)); + + EXPECT_TRUE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(schema, false_filter, + bloom_filter)); + EXPECT_FALSE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(schema, true_filter, + bloom_filter)); +} + +TEST(ParquetBloomFilterPruningTest, StringPredicateUsesArrowByteArrayHash) { + auto schema = primitive_bloom_schema(std::make_shared()); + auto bloom_filter = bloom_filter_for_string_values({"alpha", "omega"}); + auto absent_filter = bloom_filter_with_predicate(create_comparison_predicate( + 0, "c0", schema.type, Field::create_field("beta"), false)); + auto present_filter = + bloom_filter_with_predicate(create_comparison_predicate( + 0, "c0", schema.type, Field::create_field("alpha"), false)); + + EXPECT_TRUE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(schema, absent_filter, + bloom_filter)); + EXPECT_FALSE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes( + schema, present_filter, bloom_filter)); +} + +TEST(ParquetBloomFilterPruningTest, NullableAcceptingAndUnsupportedPredicatesKeepRowGroup) { + auto schema = primitive_bloom_schema(std::make_shared()); + auto bloom_filter = bloom_filter_for_int32_values({1}); + auto nested_predicate = create_comparison_predicate( + 0, "c0", schema.type, Field::create_field(2), false); + auto accept_null_filter = + bloom_filter_with_predicate(std::make_shared(nested_predicate)); + EXPECT_FALSE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes( + schema, accept_null_filter, bloom_filter)); + + auto unsupported_schema = primitive_bloom_schema(std::make_shared()); + auto unsupported_filter = + bloom_filter_with_predicate(create_comparison_predicate( + 0, "c0", unsupported_schema.type, Field::create_field(2), + false)); + EXPECT_FALSE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes( + unsupported_schema, unsupported_filter, bloom_filter)); +} + +} // namespace +} // namespace doris diff --git a/be/test/format_v2/parquet/parquet_type_test.cpp b/be/test/format_v2/parquet/parquet_type_test.cpp new file mode 100644 index 00000000000000..4bca77c1803b49 --- /dev/null +++ b/be/test/format_v2/parquet/parquet_type_test.cpp @@ -0,0 +1,494 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/parquet/parquet_type.h" + +#include +#include +#include +#include +#include +#include + +#include + +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/primitive_type.h" + +namespace doris::format::parquet { +namespace { + +::parquet::SchemaDescriptor make_descriptor(const ::parquet::schema::NodePtr& node) { + auto schema = + ::parquet::schema::GroupNode::Make("schema", ::parquet::Repetition::REQUIRED, {node}); + ::parquet::SchemaDescriptor descriptor; + descriptor.Init(schema); + return descriptor; +} + +ParquetTypeDescriptor resolve_node(const ::parquet::schema::NodePtr& node) { + auto descriptor = make_descriptor(node); + return resolve_parquet_type(descriptor.Column(0)); +} + +PrimitiveType primitive_type(const DataTypePtr& type) { + return remove_nullable(type)->get_primitive_type(); +} + +int scale_of(const DataTypePtr& type) { + return remove_nullable(type)->get_scale(); +} + +std::shared_ptr make_float16_array() { + arrow::HalfFloatBuilder builder; + EXPECT_TRUE(builder.Append(0x3E00).ok()); + std::shared_ptr array; + EXPECT_TRUE(builder.Finish(&array).ok()); + return array; +} + +ParquetTypeDescriptor resolve_arrow_float16_type() { + const auto schema = arrow::schema({arrow::field("f16", arrow::float16(), true)}); + const auto table = arrow::Table::Make(schema, {make_float16_array()}); + auto out_result = arrow::io::BufferOutputStream::Create(); + EXPECT_TRUE(out_result.ok()); + auto out = *out_result; + EXPECT_TRUE(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1).ok()); + auto buffer_result = out->Finish(); + EXPECT_TRUE(buffer_result.ok()); + + auto reader = ::parquet::ParquetFileReader::Open( + std::make_shared(*buffer_result)); + return resolve_parquet_type(reader->metadata()->schema()->Column(0)); +} + +} // namespace + +TEST(ParquetTypeTest, ResolveLogicalIntegerMappings) { + struct Case { + int bit_width; + bool is_signed; + PrimitiveType expected_type; + bool expected_unsigned; + }; + const std::vector cases = { + {8, true, TYPE_TINYINT, false}, {8, false, TYPE_SMALLINT, true}, + {16, true, TYPE_SMALLINT, false}, {16, false, TYPE_INT, true}, + {32, true, TYPE_INT, false}, {32, false, TYPE_BIGINT, true}, + {64, true, TYPE_BIGINT, false}, {64, false, TYPE_LARGEINT, true}, + }; + + for (const auto& test_case : cases) { + SCOPED_TRACE(test_case.bit_width); + const auto node = ::parquet::schema::PrimitiveNode::Make( + "c", ::parquet::Repetition::REQUIRED, + ::parquet::LogicalType::Int(test_case.bit_width, test_case.is_signed), + test_case.bit_width == 64 ? ::parquet::Type::INT64 : ::parquet::Type::INT32); + const auto type = resolve_node(node); + ASSERT_NE(type.doris_type, nullptr); + EXPECT_EQ(primitive_type(type.doris_type), test_case.expected_type); + EXPECT_EQ(type.integer_bit_width, test_case.bit_width); + EXPECT_EQ(type.is_unsigned_integer, test_case.expected_unsigned); + EXPECT_TRUE(type.supports_record_reader); + } +} + +TEST(ParquetTypeTest, ResolveLogicalTimeAndTimestampMappings) { + const auto time_millis = resolve_node(::parquet::schema::PrimitiveNode::Make( + "time_ms", ::parquet::Repetition::REQUIRED, + ::parquet::LogicalType::Time(false, ::parquet::LogicalType::TimeUnit::MILLIS), + ::parquet::Type::INT32)); + ASSERT_NE(time_millis.doris_type, nullptr); + EXPECT_EQ(primitive_type(time_millis.doris_type), TYPE_TIMEV2); + EXPECT_EQ(time_millis.time_unit, ParquetTimeUnit::MILLIS); + EXPECT_EQ(time_millis.extra_type_info, ParquetExtraTypeInfo::UNIT_MS); + + const auto time_micros = resolve_node(::parquet::schema::PrimitiveNode::Make( + "time_us", ::parquet::Repetition::REQUIRED, + ::parquet::LogicalType::Time(false, ::parquet::LogicalType::TimeUnit::MICROS), + ::parquet::Type::INT64)); + ASSERT_NE(time_micros.doris_type, nullptr); + EXPECT_EQ(primitive_type(time_micros.doris_type), TYPE_TIMEV2); + EXPECT_EQ(time_micros.time_unit, ParquetTimeUnit::MICROS); + EXPECT_EQ(time_micros.extra_type_info, ParquetExtraTypeInfo::UNIT_MICROS); + + const auto adjusted_time = resolve_node(::parquet::schema::PrimitiveNode::Make( + "time_adjusted", ::parquet::Repetition::REQUIRED, + ::parquet::LogicalType::Time(true, ::parquet::LogicalType::TimeUnit::MILLIS), + ::parquet::Type::INT32)); + EXPECT_EQ(adjusted_time.doris_type, nullptr); + EXPECT_FALSE(adjusted_time.supports_record_reader); + EXPECT_FALSE(adjusted_time.unsupported_reason.empty()); + + const auto timestamp_nanos = resolve_node(::parquet::schema::PrimitiveNode::Make( + "ts_ns", ::parquet::Repetition::OPTIONAL, + ::parquet::LogicalType::Timestamp(true, ::parquet::LogicalType::TimeUnit::NANOS), + ::parquet::Type::INT64)); + ASSERT_NE(timestamp_nanos.doris_type, nullptr); + EXPECT_TRUE(timestamp_nanos.doris_type->is_nullable()); + EXPECT_EQ(primitive_type(timestamp_nanos.doris_type), TYPE_DATETIMEV2); + EXPECT_TRUE(timestamp_nanos.is_timestamp); + EXPECT_TRUE(timestamp_nanos.timestamp_is_adjusted_to_utc); + EXPECT_EQ(timestamp_nanos.time_unit, ParquetTimeUnit::NANOS); + EXPECT_EQ(timestamp_nanos.extra_type_info, ParquetExtraTypeInfo::UNIT_NS); +} + +TEST(ParquetTypeTest, ResolveLogicalTimestampMatrix) { + struct Case { + ::parquet::LogicalType::TimeUnit::unit parquet_unit; + bool adjusted_to_utc; + ParquetTimeUnit expected_unit; + ParquetExtraTypeInfo expected_extra; + int expected_scale; + }; + const std::vector cases = { + {::parquet::LogicalType::TimeUnit::MILLIS, true, ParquetTimeUnit::MILLIS, + ParquetExtraTypeInfo::UNIT_MS, 3}, + {::parquet::LogicalType::TimeUnit::MILLIS, false, ParquetTimeUnit::MILLIS, + ParquetExtraTypeInfo::UNIT_MS, 3}, + {::parquet::LogicalType::TimeUnit::MICROS, true, ParquetTimeUnit::MICROS, + ParquetExtraTypeInfo::UNIT_MICROS, 6}, + {::parquet::LogicalType::TimeUnit::MICROS, false, ParquetTimeUnit::MICROS, + ParquetExtraTypeInfo::UNIT_MICROS, 6}, + {::parquet::LogicalType::TimeUnit::NANOS, true, ParquetTimeUnit::NANOS, + ParquetExtraTypeInfo::UNIT_NS, 6}, + {::parquet::LogicalType::TimeUnit::NANOS, false, ParquetTimeUnit::NANOS, + ParquetExtraTypeInfo::UNIT_NS, 6}, + }; + + for (const auto& test_case : cases) { + SCOPED_TRACE(test_case.expected_scale); + const auto type = resolve_node(::parquet::schema::PrimitiveNode::Make( + "ts", ::parquet::Repetition::OPTIONAL, + ::parquet::LogicalType::Timestamp(test_case.adjusted_to_utc, + test_case.parquet_unit), + ::parquet::Type::INT64)); + ASSERT_NE(type.doris_type, nullptr); + EXPECT_TRUE(type.doris_type->is_nullable()); + EXPECT_EQ(primitive_type(type.doris_type), TYPE_DATETIMEV2); + EXPECT_EQ(scale_of(type.doris_type), test_case.expected_scale); + EXPECT_TRUE(type.is_timestamp); + EXPECT_EQ(type.timestamp_is_adjusted_to_utc, test_case.adjusted_to_utc); + EXPECT_EQ(type.time_unit, test_case.expected_unit); + EXPECT_EQ(type.extra_type_info, test_case.expected_extra); + } +} + +TEST(ParquetTypeTest, ConvertedTimeIsRejectedButConvertedTimestampIsSupported) { + const auto converted_time = resolve_node(::parquet::schema::PrimitiveNode::Make( + "time_ms", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT32, + ::parquet::ConvertedType::TIME_MILLIS)); + EXPECT_EQ(converted_time.doris_type, nullptr); + EXPECT_FALSE(converted_time.supports_record_reader); + EXPECT_FALSE(converted_time.unsupported_reason.empty()); + + const auto converted_timestamp = resolve_node(::parquet::schema::PrimitiveNode::Make( + "ts_ms", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT64, + ::parquet::ConvertedType::TIMESTAMP_MILLIS)); + ASSERT_NE(converted_timestamp.doris_type, nullptr); + EXPECT_EQ(primitive_type(converted_timestamp.doris_type), TYPE_DATETIMEV2); + EXPECT_TRUE(converted_timestamp.is_timestamp); + EXPECT_TRUE(converted_timestamp.timestamp_is_adjusted_to_utc); + EXPECT_EQ(converted_timestamp.time_unit, ParquetTimeUnit::MILLIS); + + const auto converted_timestamp_micros = resolve_node(::parquet::schema::PrimitiveNode::Make( + "ts_us", ::parquet::Repetition::OPTIONAL, ::parquet::Type::INT64, + ::parquet::ConvertedType::TIMESTAMP_MICROS)); + ASSERT_NE(converted_timestamp_micros.doris_type, nullptr); + EXPECT_TRUE(converted_timestamp_micros.doris_type->is_nullable()); + EXPECT_EQ(primitive_type(converted_timestamp_micros.doris_type), TYPE_DATETIMEV2); + EXPECT_EQ(scale_of(converted_timestamp_micros.doris_type), 6); + EXPECT_TRUE(converted_timestamp_micros.is_timestamp); + EXPECT_TRUE(converted_timestamp_micros.timestamp_is_adjusted_to_utc); + EXPECT_EQ(converted_timestamp_micros.time_unit, ParquetTimeUnit::MICROS); + EXPECT_EQ(converted_timestamp_micros.extra_type_info, ParquetExtraTypeInfo::UNIT_MICROS); +} + +TEST(ParquetTypeTest, ResolveConvertedIntegerMappingsAndDecodedKinds) { + struct Case { + ::parquet::ConvertedType::type converted_type; + ::parquet::Type::type physical_type; + PrimitiveType expected_type; + int bit_width; + bool expected_unsigned; + DecodedValueKind expected_value_kind; + }; + const std::vector cases = { + {::parquet::ConvertedType::INT_8, ::parquet::Type::INT32, TYPE_TINYINT, 8, false, + DecodedValueKind::INT32}, + {::parquet::ConvertedType::UINT_8, ::parquet::Type::INT32, TYPE_SMALLINT, 8, true, + DecodedValueKind::INT32}, + {::parquet::ConvertedType::INT_16, ::parquet::Type::INT32, TYPE_SMALLINT, 16, false, + DecodedValueKind::INT32}, + {::parquet::ConvertedType::UINT_16, ::parquet::Type::INT32, TYPE_INT, 16, true, + DecodedValueKind::INT32}, + {::parquet::ConvertedType::INT_32, ::parquet::Type::INT32, TYPE_INT, 32, false, + DecodedValueKind::INT32}, + {::parquet::ConvertedType::UINT_32, ::parquet::Type::INT32, TYPE_BIGINT, 32, true, + DecodedValueKind::UINT32}, + {::parquet::ConvertedType::INT_64, ::parquet::Type::INT64, TYPE_BIGINT, 64, false, + DecodedValueKind::INT64}, + {::parquet::ConvertedType::UINT_64, ::parquet::Type::INT64, TYPE_LARGEINT, 64, true, + DecodedValueKind::UINT64}, + }; + + for (const auto& test_case : cases) { + SCOPED_TRACE(test_case.converted_type); + const auto type = resolve_node(::parquet::schema::PrimitiveNode::Make( + "c", ::parquet::Repetition::REQUIRED, test_case.physical_type, + test_case.converted_type)); + ASSERT_NE(type.doris_type, nullptr); + EXPECT_EQ(primitive_type(type.doris_type), test_case.expected_type); + EXPECT_EQ(type.integer_bit_width, test_case.bit_width); + EXPECT_EQ(type.is_unsigned_integer, test_case.expected_unsigned); + EXPECT_EQ(decoded_value_kind(type), test_case.expected_value_kind); + } +} + +TEST(ParquetTypeTest, ResolveConvertedDecimalCarriers) { + struct Case { + ::parquet::Type::type physical_type; + int type_length; + int precision; + int scale; + PrimitiveType expected_type; + ParquetExtraTypeInfo expected_extra; + }; + const std::vector cases = { + {::parquet::Type::INT32, -1, 9, 2, TYPE_DECIMAL32, ParquetExtraTypeInfo::DECIMAL_INT32}, + {::parquet::Type::INT64, -1, 18, 6, TYPE_DECIMAL64, + ParquetExtraTypeInfo::DECIMAL_INT64}, + {::parquet::Type::BYTE_ARRAY, -1, 20, 5, TYPE_DECIMAL128I, + ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY}, + {::parquet::Type::FIXED_LEN_BYTE_ARRAY, 16, 38, 6, TYPE_DECIMAL128I, + ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY}, + {::parquet::Type::FIXED_LEN_BYTE_ARRAY, 20, 39, 6, TYPE_DECIMAL256, + ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY}, + }; + + for (const auto& test_case : cases) { + SCOPED_TRACE(test_case.physical_type); + const auto type = resolve_node(::parquet::schema::PrimitiveNode::Make( + "d", ::parquet::Repetition::REQUIRED, test_case.physical_type, + ::parquet::ConvertedType::DECIMAL, test_case.type_length, test_case.precision, + test_case.scale)); + ASSERT_NE(type.doris_type, nullptr); + EXPECT_EQ(primitive_type(type.doris_type), test_case.expected_type); + EXPECT_TRUE(type.is_decimal); + EXPECT_FALSE(type.is_string_like); + EXPECT_EQ(type.decimal_precision, test_case.precision); + EXPECT_EQ(type.decimal_scale, test_case.scale); + EXPECT_EQ(type.extra_type_info, test_case.expected_extra); + } +} + +TEST(ParquetTypeTest, ResolveLogicalStringDateAndDecimalMappings) { + const std::vector> string_like_logical_types = { + ::parquet::LogicalType::String(), ::parquet::LogicalType::Enum(), + ::parquet::LogicalType::JSON(), ::parquet::LogicalType::BSON()}; + for (const auto& logical_type : string_like_logical_types) { + const auto type = resolve_node(::parquet::schema::PrimitiveNode::Make( + "s", ::parquet::Repetition::OPTIONAL, logical_type, ::parquet::Type::BYTE_ARRAY)); + ASSERT_NE(type.doris_type, nullptr); + EXPECT_TRUE(type.doris_type->is_nullable()); + EXPECT_EQ(primitive_type(type.doris_type), TYPE_STRING); + EXPECT_TRUE(type.is_string_like); + } + + const auto uuid = resolve_node(::parquet::schema::PrimitiveNode::Make( + "uuid", ::parquet::Repetition::OPTIONAL, ::parquet::LogicalType::UUID(), + ::parquet::Type::FIXED_LEN_BYTE_ARRAY, 16)); + ASSERT_NE(uuid.doris_type, nullptr); + EXPECT_TRUE(uuid.doris_type->is_nullable()); + EXPECT_EQ(primitive_type(uuid.doris_type), TYPE_STRING); + EXPECT_TRUE(uuid.is_string_like); + + const auto date = resolve_node(::parquet::schema::PrimitiveNode::Make( + "d", ::parquet::Repetition::REQUIRED, ::parquet::LogicalType::Date(), + ::parquet::Type::INT32)); + ASSERT_NE(date.doris_type, nullptr); + EXPECT_EQ(primitive_type(date.doris_type), TYPE_DATEV2); + + const auto decimal64 = resolve_node(::parquet::schema::PrimitiveNode::Make( + "d64", ::parquet::Repetition::REQUIRED, ::parquet::LogicalType::Decimal(18, 6), + ::parquet::Type::INT64)); + ASSERT_NE(decimal64.doris_type, nullptr); + EXPECT_EQ(primitive_type(decimal64.doris_type), TYPE_DECIMAL64); + EXPECT_TRUE(decimal64.is_decimal); + EXPECT_EQ(decimal64.decimal_precision, 18); + EXPECT_EQ(decimal64.decimal_scale, 6); + EXPECT_EQ(decimal64.extra_type_info, ParquetExtraTypeInfo::DECIMAL_INT64); + + const auto decimal128 = resolve_node(::parquet::schema::PrimitiveNode::Make( + "d128", ::parquet::Repetition::REQUIRED, ::parquet::LogicalType::Decimal(38, 6), + ::parquet::Type::FIXED_LEN_BYTE_ARRAY, 16)); + ASSERT_NE(decimal128.doris_type, nullptr); + EXPECT_EQ(primitive_type(decimal128.doris_type), TYPE_DECIMAL128I); + EXPECT_TRUE(decimal128.is_decimal); + EXPECT_EQ(decimal128.decimal_precision, 38); + EXPECT_EQ(decimal128.decimal_scale, 6); + EXPECT_EQ(decimal128.extra_type_info, ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY); + + const auto decimal256 = resolve_node(::parquet::schema::PrimitiveNode::Make( + "d256", ::parquet::Repetition::REQUIRED, ::parquet::LogicalType::Decimal(39, 6), + ::parquet::Type::FIXED_LEN_BYTE_ARRAY, 20)); + ASSERT_NE(decimal256.doris_type, nullptr); + EXPECT_EQ(primitive_type(decimal256.doris_type), TYPE_DECIMAL256); + EXPECT_TRUE(decimal256.is_decimal); + EXPECT_EQ(decimal256.decimal_precision, 39); + EXPECT_EQ(decimal256.decimal_scale, 6); + EXPECT_EQ(decimal256.extra_type_info, ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY); + EXPECT_FALSE(decimal256.is_string_like); +} + +TEST(ParquetTypeTest, LogicalConvertedAndPhysicalFallbackLevelsAreDistinct) { + const auto logical_type = resolve_node(::parquet::schema::PrimitiveNode::Make( + "c", ::parquet::Repetition::REQUIRED, ::parquet::LogicalType::Int(8, true), + ::parquet::Type::INT32)); + ASSERT_NE(logical_type.doris_type, nullptr); + EXPECT_EQ(primitive_type(logical_type.doris_type), TYPE_TINYINT); + EXPECT_EQ(logical_type.integer_bit_width, 8); + + const auto converted_type = resolve_node(::parquet::schema::PrimitiveNode::Make( + "c", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT32, + ::parquet::ConvertedType::INT_8)); + ASSERT_NE(converted_type.doris_type, nullptr); + EXPECT_EQ(primitive_type(converted_type.doris_type), TYPE_TINYINT); + EXPECT_EQ(converted_type.integer_bit_width, 8); + + const auto physical_type = resolve_node(::parquet::schema::PrimitiveNode::Make( + "c", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT32)); + ASSERT_NE(physical_type.doris_type, nullptr); + EXPECT_EQ(primitive_type(physical_type.doris_type), TYPE_INT); + EXPECT_EQ(physical_type.integer_bit_width, -1); +} + +TEST(ParquetTypeTest, ResolveDecimalStringLikeFloat16AndPhysicalFallback) { + const auto decimal256 = resolve_node(::parquet::schema::PrimitiveNode::Make( + "d", ::parquet::Repetition::REQUIRED, ::parquet::Type::FIXED_LEN_BYTE_ARRAY, + ::parquet::ConvertedType::DECIMAL, 20, 39, 6)); + ASSERT_NE(decimal256.doris_type, nullptr); + EXPECT_EQ(primitive_type(decimal256.doris_type), TYPE_DECIMAL256); + EXPECT_TRUE(decimal256.is_decimal); + EXPECT_FALSE(decimal256.is_string_like); + EXPECT_EQ(decimal256.decimal_precision, 39); + EXPECT_EQ(decimal256.decimal_scale, 6); + EXPECT_EQ(decimal256.extra_type_info, ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY); + + const auto plain_binary = resolve_node(::parquet::schema::PrimitiveNode::Make( + "s", ::parquet::Repetition::REQUIRED, ::parquet::Type::BYTE_ARRAY)); + ASSERT_NE(plain_binary.doris_type, nullptr); + EXPECT_EQ(primitive_type(plain_binary.doris_type), TYPE_STRING); + EXPECT_TRUE(plain_binary.is_string_like); + + const auto float16 = resolve_arrow_float16_type(); + ASSERT_NE(float16.doris_type, nullptr); + EXPECT_TRUE(float16.doris_type->is_nullable()); + EXPECT_EQ(float16.physical_type, ::parquet::Type::FIXED_LEN_BYTE_ARRAY); + EXPECT_EQ(float16.fixed_length, 2); + EXPECT_EQ(primitive_type(float16.doris_type), TYPE_FLOAT); + EXPECT_EQ(float16.extra_type_info, ParquetExtraTypeInfo::FLOAT16); + EXPECT_FALSE(float16.is_string_like); + EXPECT_EQ(decoded_value_kind(float16), DecodedValueKind::FIXED_BINARY); +} + +TEST(ParquetTypeTest, ResolveNullDescriptorAndPhysicalFallback) { + const auto null_type = resolve_parquet_type(nullptr); + EXPECT_EQ(null_type.doris_type, nullptr); + EXPECT_EQ(null_type.physical_type, ::parquet::Type::UNDEFINED); + EXPECT_TRUE(null_type.supports_record_reader); + + const auto int96 = resolve_node(::parquet::schema::PrimitiveNode::Make( + "ts", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT96)); + ASSERT_NE(int96.doris_type, nullptr); + EXPECT_EQ(primitive_type(int96.doris_type), TYPE_DATETIMEV2); + EXPECT_EQ(int96.extra_type_info, ParquetExtraTypeInfo::IMPALA_TIMESTAMP); + EXPECT_EQ(decoded_value_kind(int96), DecodedValueKind::INT96); +} + +TEST(ParquetTypeTest, ResolveEveryPhysicalFallback) { + struct Case { + ::parquet::schema::NodePtr node; + PrimitiveType expected_type; + DecodedValueKind expected_kind; + bool expected_string_like = false; + }; + const std::vector cases = { + {::parquet::schema::PrimitiveNode::Make("b", ::parquet::Repetition::REQUIRED, + ::parquet::Type::BOOLEAN), + TYPE_BOOLEAN, DecodedValueKind::BOOL}, + {::parquet::schema::PrimitiveNode::Make("i32", ::parquet::Repetition::REQUIRED, + ::parquet::Type::INT32), + TYPE_INT, DecodedValueKind::INT32}, + {::parquet::schema::PrimitiveNode::Make("i64", ::parquet::Repetition::REQUIRED, + ::parquet::Type::INT64), + TYPE_BIGINT, DecodedValueKind::INT64}, + {::parquet::schema::PrimitiveNode::Make("f", ::parquet::Repetition::REQUIRED, + ::parquet::Type::FLOAT), + TYPE_FLOAT, DecodedValueKind::FLOAT}, + {::parquet::schema::PrimitiveNode::Make("d", ::parquet::Repetition::REQUIRED, + ::parquet::Type::DOUBLE), + TYPE_DOUBLE, DecodedValueKind::DOUBLE}, + {::parquet::schema::PrimitiveNode::Make("s", ::parquet::Repetition::REQUIRED, + ::parquet::Type::BYTE_ARRAY), + TYPE_STRING, DecodedValueKind::BINARY, true}, + {::parquet::schema::PrimitiveNode::Make("fs", ::parquet::Repetition::REQUIRED, + ::parquet::Type::FIXED_LEN_BYTE_ARRAY, + ::parquet::ConvertedType::NONE, 4), + TYPE_STRING, DecodedValueKind::FIXED_BINARY, true}, + {::parquet::schema::PrimitiveNode::Make("ts", ::parquet::Repetition::REQUIRED, + ::parquet::Type::INT96), + TYPE_DATETIMEV2, DecodedValueKind::INT96}, + }; + + for (const auto& test_case : cases) { + SCOPED_TRACE(test_case.expected_type); + const auto type = resolve_node(test_case.node); + ASSERT_NE(type.doris_type, nullptr); + EXPECT_EQ(primitive_type(type.doris_type), test_case.expected_type); + EXPECT_EQ(decoded_value_kind(type), test_case.expected_kind); + EXPECT_EQ(type.is_string_like, test_case.expected_string_like); + EXPECT_TRUE(type.supports_record_reader); + } +} + +TEST(ParquetTypeTest, InvalidLogicalAnnotationsFallBackOrRejectAsSpecified) { + EXPECT_THROW(::parquet::LogicalType::Int(24, true), ::parquet::ParquetException); + + const auto nanos_time = resolve_node(::parquet::schema::PrimitiveNode::Make( + "time_ns", ::parquet::Repetition::REQUIRED, + ::parquet::LogicalType::Time(false, ::parquet::LogicalType::TimeUnit::NANOS), + ::parquet::Type::INT64)); + ASSERT_NE(nanos_time.doris_type, nullptr); + EXPECT_EQ(primitive_type(nanos_time.doris_type), TYPE_BIGINT); + EXPECT_TRUE(nanos_time.unsupported_reason.empty()); + + const auto adjusted_nanos_time = resolve_node(::parquet::schema::PrimitiveNode::Make( + "time_ns_utc", ::parquet::Repetition::REQUIRED, + ::parquet::LogicalType::Time(true, ::parquet::LogicalType::TimeUnit::NANOS), + ::parquet::Type::INT64)); + EXPECT_EQ(adjusted_nanos_time.doris_type, nullptr); + EXPECT_FALSE(adjusted_nanos_time.supports_record_reader); + EXPECT_FALSE(adjusted_nanos_time.unsupported_reason.empty()); + + EXPECT_THROW(::parquet::schema::PrimitiveNode::Make("f16_bad", ::parquet::Repetition::REQUIRED, + ::parquet::LogicalType::Float16(), + ::parquet::Type::FIXED_LEN_BYTE_ARRAY, 4), + ::parquet::ParquetException); +} + +} // namespace doris::format::parquet diff --git a/be/test/format_v2/table/hive_reader_test.cpp b/be/test/format_v2/table/hive_reader_test.cpp new file mode 100644 index 00000000000000..67be16856f53eb --- /dev/null +++ b/be/test/format_v2/table/hive_reader_test.cpp @@ -0,0 +1,151 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/table/hive_reader.h" + +#include + +#include +#include +#include + +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "format_v2/column_data.h" +#include "gen_cpp/PlanNodes_types.h" +#include "runtime/runtime_profile.h" +#include "runtime/runtime_state.h" + +namespace doris::format::hive { +namespace { + +ColumnDefinition table_column(const std::string& name, DataTypePtr type) { + ColumnDefinition column; + column.identifier = Field::create_field(name); + column.name = name; + column.type = std::move(type); + return column; +} + +Status init_hive_reader(FileFormat format, TFileScanRangeParams* params, RuntimeState* state, + RuntimeProfile* profile, HiveReader* reader) { + return reader->init({ + .projected_columns = {table_column("id", std::make_shared()), + table_column("name", std::make_shared())}, + .column_predicates = {}, + .conjuncts = {}, + .format = format, + .scan_params = params, + .io_ctx = nullptr, + .runtime_state = state, + .scanner_profile = profile, + }); +} + +class HiveV2ReaderTest : public testing::Test { +public: + HiveV2ReaderTest() : state(query_options, query_globals), profile("hive_v2_reader_test") {} + +protected: + TQueryOptions query_options; + TQueryGlobals query_globals; + RuntimeState state; + RuntimeProfile profile; +}; + +// Scenario: Hive tables using OpenCSVSerde are planned as table_format=hive with CSV file format. +// HiveReader must allow that file format so TableReader can create the v2 CsvReader. +TEST_F(HiveV2ReaderTest, InitSupportsCsvFileFormat) { + TFileScanRangeParams params; + params.__set_format_type(TFileFormatType::FORMAT_CSV_PLAIN); + HiveReader reader; + + ASSERT_TRUE(init_hive_reader(FileFormat::CSV, ¶ms, &state, &profile, &reader).ok()); + EXPECT_EQ(reader.mapping_mode(), TableColumnMappingMode::BY_NAME); +} + +// Scenario: Hive text files also synthesize a file-local schema from FE slots, so they should use +// name mapping at the table-reader layer while TextReader consumes column_idxs for field ordinals. +TEST_F(HiveV2ReaderTest, InitSupportsTextFileFormat) { + TFileScanRangeParams params; + params.__set_format_type(TFileFormatType::FORMAT_TEXT); + HiveReader reader; + + ASSERT_TRUE(init_hive_reader(FileFormat::TEXT, ¶ms, &state, &profile, &reader).ok()); + EXPECT_EQ(reader.mapping_mode(), TableColumnMappingMode::BY_NAME); +} + +// Scenario: Hive JSON files also synthesize a file-local schema from FE slots, so they should use +// name mapping at the table-reader layer while JsonReader consumes JSON attributes. +TEST_F(HiveV2ReaderTest, InitSupportsJsonFileFormat) { + TFileScanRangeParams params; + params.__set_format_type(TFileFormatType::FORMAT_JSON); + HiveReader reader; + + ASSERT_TRUE(init_hive_reader(FileFormat::JSON, ¶ms, &state, &profile, &reader).ok()); + EXPECT_EQ(reader.mapping_mode(), TableColumnMappingMode::BY_NAME); +} + +TEST_F(HiveV2ReaderTest, MappingModeUsesInitializedFormat) { + query_options.hive_parquet_use_column_names = false; + query_options.hive_orc_use_column_names = true; + state.set_query_options(query_options); + + TFileScanRangeParams params; + params.__set_format_type(TFileFormatType::FORMAT_PARQUET); + HiveReader reader; + + ASSERT_TRUE(init_hive_reader(FileFormat::PARQUET, ¶ms, &state, &profile, &reader).ok()); + EXPECT_EQ(reader.mapping_mode(), TableColumnMappingMode::BY_INDEX); + + SplitReadOptions parquet_split; + parquet_split.current_range.__set_path("split.parquet"); + parquet_split.current_split_format = FileFormat::PARQUET; + ASSERT_TRUE(reader.prepare_split(parquet_split).ok()); + EXPECT_EQ(reader.mapping_mode(), TableColumnMappingMode::BY_INDEX); + + SplitReadOptions orc_split; + orc_split.current_range.__set_path("split.orc"); + orc_split.current_split_format = FileFormat::ORC; + EXPECT_FALSE(reader.prepare_split(orc_split).ok()); +} + +// Scenario: positional mapping is only for Hive Parquet/ORC sessions that disable name mapping. +// CSV keeps the synthesized file-column names and leaves column_idxs for the CsvReader itself. +TEST_F(HiveV2ReaderTest, CsvDoesNotConsumeColumnIdxsAsPositionalSchemaMapping) { + query_options.hive_parquet_use_column_names = false; + TFileScanRangeParams params; + params.__set_format_type(TFileFormatType::FORMAT_CSV_PLAIN); + params.__set_column_idxs({3}); + ProjectedColumnBuildContext context { + .scan_params = ¶ms, + .runtime_state = &state, + }; + HiveReader reader; + + TFileScanSlotInfo slot; + slot.__set_is_file_slot(true); + auto column = table_column("value", std::make_shared()); + + ASSERT_TRUE(reader.annotate_projected_column(slot, &context, &column).ok()); + ASSERT_TRUE(column.has_identifier_name()); + EXPECT_EQ(column.get_identifier_name(), "value"); + EXPECT_EQ(context.next_file_column_idx, 0); +} + +} // namespace +} // namespace doris::format::hive diff --git a/be/test/format_v2/table/hudi_reader_test.cpp b/be/test/format_v2/table/hudi_reader_test.cpp new file mode 100644 index 00000000000000..125183cd7a60c4 --- /dev/null +++ b/be/test/format_v2/table/hudi_reader_test.cpp @@ -0,0 +1,182 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/table/hudi_reader.h" + +#include + +#include +#include +#include +#include +#include + +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type/data_type_struct.h" +#include "core/field.h" +#include "format_v2/column_data.h" +#include "gen_cpp/ExternalTableSchema_types.h" +#include "gen_cpp/PlanNodes_types.h" + +namespace doris::format { +namespace { + +schema::external::TFieldPtr external_schema_field(std::string name, int32_t id, + std::vector aliases = {}) { + auto field = std::make_shared(); + field->__set_name(std::move(name)); + field->__set_id(id); + if (!aliases.empty()) { + field->__set_name_mapping(std::move(aliases)); + } + schema::external::TFieldPtr field_ptr; + field_ptr.field_ptr = std::move(field); + field_ptr.__isset.field_ptr = true; + return field_ptr; +} + +schema::external::TSchema external_schema(int64_t schema_id, + std::vector fields) { + schema::external::TStructField root_field; + root_field.__set_fields(std::move(fields)); + schema::external::TSchema schema; + schema.__set_schema_id(schema_id); + schema.__set_root_field(std::move(root_field)); + return schema; +} + +ColumnDefinition make_file_column(int32_t id, const std::string& name, const DataTypePtr& type) { + ColumnDefinition field; + field.identifier = Field::create_field(id); + field.local_id = id; + field.name = name; + field.type = type; + return field; +} + +TTableFormatFileDesc hudi_table_format_desc(std::optional schema_id) { + TTableFormatFileDesc table_format_params; + table_format_params.__set_table_format_type("hudi"); + THudiFileDesc hudi_params; + if (schema_id.has_value()) { + hudi_params.__set_schema_id(*schema_id); + } + table_format_params.__set_hudi_params(hudi_params); + return table_format_params; +} + +// Scenario: FileScannerV2 Hudi native reader uses the split schema id to annotate the physical +// file schema before TableColumnMapper runs. This keeps schema-evolved Hudi files on field-id +// mapping, including renamed nested children. +TEST(HudiReaderTest, AnnotatesFileSchemaFromSplitHistorySchema) { + TFileScanRangeParams scan_params; + scan_params.__set_current_schema_id(200); + + auto profile_field = external_schema_field("profile", 20); + schema::external::TStructField profile_struct; + profile_struct.__set_fields({external_schema_field("old_age", 21, {"age"})}); + profile_field.field_ptr->nestedField.__set_struct_field(std::move(profile_struct)); + profile_field.field_ptr->__isset.nestedField = true; + + scan_params.__set_history_schema_info({ + external_schema(100, {external_schema_field("old_name", 10, {"name"}), profile_field}), + external_schema( + 200, {external_schema_field("name", 10), external_schema_field("profile", 20)}), + }); + + hudi::HudiReader reader; + reader.TEST_set_scan_params(&scan_params); + + SplitReadOptions split_options; + split_options.current_range.__set_table_format_params(hudi_table_format_desc(100)); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_FIELD_ID); + + auto string_type = std::make_shared(); + auto int_type = std::make_shared(); + auto profile_type = std::make_shared(DataTypes {int_type}, Strings {"old_age"}); + auto profile_column = make_file_column(1, "profile", profile_type); + profile_column.children = {make_file_column(0, "old_age", int_type)}; + std::vector file_schema { + make_file_column(0, "old_name", string_type), + profile_column, + }; + + ASSERT_TRUE(reader.TEST_annotate_file_schema(&file_schema).ok()); + ASSERT_EQ(file_schema.size(), 2); + EXPECT_EQ(file_schema[0].get_identifier_field_id(), 10); + EXPECT_EQ(file_schema[0].name_mapping, std::vector({"name"})); + EXPECT_EQ(file_schema[1].get_identifier_field_id(), 20); + ASSERT_EQ(file_schema[1].children.size(), 1); + EXPECT_EQ(file_schema[1].children[0].get_identifier_field_id(), 21); + EXPECT_EQ(file_schema[1].children[0].name_mapping, std::vector({"age"})); +} + +// Scenario: a Hudi split can only use field-id mapping when its schema id resolves to a historical +// schema sent by FE. Unknown or missing split schema ids must fall back to BY_NAME and leave the +// physical file schema untouched. +TEST(HudiReaderTest, FallsBackToByNameWhenSplitHistorySchemaIsMissing) { + TFileScanRangeParams scan_params; + scan_params.__set_current_schema_id(200); + scan_params.__set_history_schema_info({ + external_schema(200, {external_schema_field("name", 10)}), + }); + + hudi::HudiReader reader; + reader.TEST_set_scan_params(&scan_params); + + SplitReadOptions split_options; + split_options.current_range.__set_table_format_params(hudi_table_format_desc(100)); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_NAME); + + std::vector file_schema { + make_file_column(0, "old_name", std::make_shared()), + }; + ASSERT_TRUE(reader.TEST_annotate_file_schema(&file_schema).ok()); + EXPECT_EQ(file_schema[0].get_identifier_field_id(), 0); + EXPECT_TRUE(file_schema[0].name_mapping.empty()); +} + +// Scenario: HudiReader must reset the previous split schema id before each split. Otherwise a +// BY_FIELD_ID split could leak its schema id into the next split that carries no schema id. +TEST(HudiReaderTest, ResetsSplitSchemaIdBeforePreparingNextSplit) { + TFileScanRangeParams scan_params; + scan_params.__set_current_schema_id(200); + scan_params.__set_history_schema_info({ + external_schema(100, {external_schema_field("old_name", 10, {"name"})}), + external_schema(200, {external_schema_field("name", 10)}), + }); + + hudi::HudiReader reader; + reader.TEST_set_scan_params(&scan_params); + + SplitReadOptions split_with_schema_id; + split_with_schema_id.current_range.__set_table_format_params(hudi_table_format_desc(100)); + ASSERT_TRUE(reader.prepare_split(split_with_schema_id).ok()); + EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_FIELD_ID); + + SplitReadOptions split_without_schema_id; + split_without_schema_id.current_range.__set_table_format_params( + hudi_table_format_desc(std::nullopt)); + ASSERT_TRUE(reader.prepare_split(split_without_schema_id).ok()); + EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_NAME); +} + +} // namespace +} // namespace doris::format diff --git a/be/test/format_v2/table/iceberg_reader_test.cpp b/be/test/format_v2/table/iceberg_reader_test.cpp new file mode 100644 index 00000000000000..84fe09bc0c55b5 --- /dev/null +++ b/be/test/format_v2/table/iceberg_reader_test.cpp @@ -0,0 +1,1852 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/table/iceberg_reader.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/consts.h" +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_array.h" +#include "core/column/column_const.h" +#include "core/column/column_map.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_struct.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type/data_type_struct.h" +#include "exec/common/endian.h" +#include "exprs/runtime_filter_expr.h" +#include "exprs/vectorized_fn_call.h" +#include "exprs/vexpr.h" +#include "exprs/vliteral.h" +#include "exprs/vslot_ref.h" +#include "format/format_common.h" +#include "format/table/deletion_vector_reader.h" +#include "format_v2/table_reader.h" +#include "gen_cpp/Exprs_types.h" +#include "gen_cpp/PlanNodes_types.h" +#include "io/io_common.h" +#include "roaring/roaring64map.hh" +#include "runtime/runtime_profile.h" +#include "runtime/runtime_state.h" +#include "storage/predicate/predicate_creator.h" +#include "storage/segment/condition_cache.h" + +namespace doris::format { +namespace { + +LocalColumnIndex field_projection(int32_t column_id) { + return LocalColumnIndex {.index = column_id}; +} + +std::vector projection_ids(const std::vector& projections) { + std::vector ids; + ids.reserve(projections.size()); + for (const auto& projection : projections) { + ids.push_back(projection.index); + } + return ids; +} +VExprSPtr table_int32_slot_ref(int slot_id, int column_id, const std::string& column_name) { + const auto nullable_int_type = make_nullable(std::make_shared()); + return VSlotRef::create_shared(slot_id, column_id, slot_id, nullable_int_type, column_name); +} + +VExprSPtr table_int32_literal(int32_t value) { + return VLiteral::create_shared(std::make_shared(), + Field::create_field(value)); +} + +VExprSPtr table_int64_literal(int64_t value) { + return VLiteral::create_shared(std::make_shared(), + Field::create_field(value)); +} + +TExprNode table_function_node(const std::string& function_name, const DataTypePtr& return_type, + const std::vector& arg_types, + TExprNodeType::type node_type, + TExprOpcode::type opcode = TExprOpcode::INVALID_OPCODE, + bool short_circuit_evaluation = false) { + TFunctionName fn_name; + fn_name.__set_function_name(function_name); + TFunction fn; + fn.__set_name(fn_name); + fn.__set_binary_type(TFunctionBinaryType::BUILTIN); + std::vector thrift_arg_types; + thrift_arg_types.reserve(arg_types.size()); + for (const auto& arg_type : arg_types) { + thrift_arg_types.push_back(arg_type->to_thrift()); + } + fn.__set_arg_types(thrift_arg_types); + fn.__set_ret_type(return_type->to_thrift()); + fn.__set_has_var_args(false); + + TExprNode node; + node.__set_node_type(node_type); + node.__set_opcode(opcode); + node.__set_type(return_type->to_thrift()); + node.__set_fn(fn); + node.__set_num_children(static_cast(arg_types.size())); + node.__set_is_nullable(return_type->is_nullable()); + if (short_circuit_evaluation) { + node.__set_short_circuit_evaluation(true); + } + return node; +} + +VExprSPtr table_function_expr(const std::string& function_name, const DataTypePtr& return_type, + const std::vector& arg_types, + TExprNodeType::type node_type = TExprNodeType::FUNCTION_CALL, + TExprOpcode::type opcode = TExprOpcode::INVALID_OPCODE) { + const auto node = table_function_node(function_name, return_type, arg_types, node_type, opcode); + return VectorizedFnCall::create_shared(node); +} + +VExprSPtr table_int32_greater_than_expr(int slot_id, int column_id, int32_t value) { + const auto int_type = std::make_shared(); + const auto nullable_int_type = make_nullable(int_type); + auto expr = table_function_expr("gt", make_nullable(std::make_shared()), + {nullable_int_type, int_type}, TExprNodeType::BINARY_PRED, + TExprOpcode::GT); + expr->add_child(table_int32_slot_ref(slot_id, column_id, "id")); + expr->add_child(table_int32_literal(value)); + return expr; +} + +VExprSPtr table_nullable_int64_binary_predicate(const std::string& function_name, + TExprOpcode::type opcode, int slot_id, + int column_id, const std::string& column_name, + int64_t value) { + const auto int64_type = std::make_shared(); + const auto nullable_int64_type = make_nullable(int64_type); + auto expr = table_function_expr(function_name, make_nullable(std::make_shared()), + {nullable_int64_type, int64_type}, TExprNodeType::BINARY_PRED, + opcode); + expr->add_child( + VSlotRef::create_shared(slot_id, column_id, slot_id, nullable_int64_type, column_name)); + expr->add_child(table_int64_literal(value)); + return expr; +} + +class IcebergTableReaderDeleteFileTestHelper final + : public doris::format::iceberg::IcebergTableReader { +public: + Status parse_deletion_vector_file(const TTableFormatFileDesc& t_desc, DeleteFileDesc* desc, + bool* has_delete_file) { + return _parse_deletion_vector_file(t_desc, desc, has_delete_file); + } +}; + +class IcebergTableReaderScanRequestTestHelper final + : public doris::format::iceberg::IcebergTableReader { +public: + Status init_for_scan_request_test(std::vector projected_columns) { + _query_options = std::make_unique(); + _query_globals = std::make_unique(); + _state = std::make_unique(*_query_options, *_query_globals); + RETURN_IF_ERROR(init({ + .projected_columns = std::move(projected_columns), + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = _state.get(), + .scanner_profile = nullptr, + })); + + SplitReadOptions split_options; + split_options.current_range.__set_path("scan-request-test.parquet"); + TTableFormatFileDesc table_format_params; + TIcebergFileDesc iceberg_params; + iceberg_params.__set_first_row_id(1000); + table_format_params.__set_iceberg_params(iceberg_params); + split_options.current_range.__set_table_format_params(table_format_params); + RETURN_IF_ERROR(prepare_split(split_options)); + + _delete_rows_storage = {1}; + _delete_rows = &_delete_rows_storage; + return Status::OK(); + } + + Status customize_request(FileScanRequest* request) { + return customize_file_scan_request(request); + } + +private: + std::unique_ptr _query_options; + std::unique_ptr _query_globals; + std::unique_ptr _state; + DeleteRows _delete_rows_storage; +}; + +class IcebergTableReaderMappingModeTestHelper final + : public doris::format::iceberg::IcebergTableReader { +public: + TableColumnMappingMode mapping_mode_for_schema(std::vector file_schema) { + _data_reader.file_schema = std::move(file_schema); + return mapping_mode(); + } +}; + +std::shared_ptr finish_array(arrow::ArrayBuilder* builder) { + std::shared_ptr array; + EXPECT_TRUE(builder->Finish(&array).ok()); + return array; +} + +std::shared_ptr build_int32_array(const std::vector& values) { + arrow::Int32Builder builder; + for (const auto value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return finish_array(&builder); +} + +std::shared_ptr build_int64_array(const std::vector& values) { + arrow::Int64Builder builder; + for (const auto value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return finish_array(&builder); +} + +std::shared_ptr build_nullable_int64_array( + const std::vector>& values) { + arrow::Int64Builder builder; + for (const auto& value : values) { + if (value.has_value()) { + EXPECT_TRUE(builder.Append(*value).ok()); + } else { + EXPECT_TRUE(builder.AppendNull().ok()); + } + } + return finish_array(&builder); +} + +std::shared_ptr build_string_array(const std::vector& values) { + arrow::StringBuilder builder; + for (const auto& value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return finish_array(&builder); +} + +void write_iceberg_equality_delete_parquet_file(const std::string& file_path, int32_t field_id, + int32_t value) { + const auto metadata = + arrow::key_value_metadata({"PARQUET:field_id"}, {std::to_string(field_id)}); + auto schema = arrow::schema({ + arrow::field("id", arrow::int32(), false)->WithMetadata(metadata), + }); + auto table = arrow::Table::Make(schema, {build_int32_array({value})}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1, + builder.build())); +} + +void write_iceberg_equality_delete_bigint_parquet_file(const std::string& file_path, + int32_t field_id, int64_t value) { + const auto metadata = + arrow::key_value_metadata({"PARQUET:field_id"}, {std::to_string(field_id)}); + auto schema = arrow::schema({ + arrow::field("id", arrow::int64(), false)->WithMetadata(metadata), + }); + auto table = arrow::Table::Make(schema, {build_int64_array({value})}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1, + builder.build())); +} + +void write_int_pair_parquet_file(const std::string& file_path, const std::vector& ids, + const std::vector& scores, + const std::vector& values, + int64_t row_group_size = -1) { + const auto id_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"0"}); + const auto score_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"1"}); + const auto value_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"2"}); + auto schema = arrow::schema({ + arrow::field("id", arrow::int32(), false)->WithMetadata(id_metadata), + arrow::field("score", arrow::int32(), false)->WithMetadata(score_metadata), + arrow::field("value", arrow::utf8(), false)->WithMetadata(value_metadata), + }); + auto table = arrow::Table::Make(schema, {build_int32_array(ids), build_int32_array(scores), + build_string_array(values)}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + const auto write_row_group_size = + row_group_size > 0 ? row_group_size : static_cast(ids.size()); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + write_row_group_size, builder.build())); +} + +void write_iceberg_row_lineage_parquet_file( + const std::string& file_path, const std::vector& ids, + const std::vector>& row_ids, + const std::vector>& last_updated_sequence_numbers = {}) { + ASSERT_EQ(ids.size(), row_ids.size()); + if (!last_updated_sequence_numbers.empty()) { + ASSERT_EQ(ids.size(), last_updated_sequence_numbers.size()); + } + const auto id_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"0"}); + const auto row_id_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"2147483540"}); + const auto last_updated_sequence_number_metadata = + arrow::key_value_metadata({"PARQUET:field_id"}, {"2147483539"}); + auto schema = arrow::schema({ + arrow::field("id", arrow::int32(), false)->WithMetadata(id_metadata), + arrow::field("_row_id", arrow::int64(), true)->WithMetadata(row_id_metadata), + }); + std::vector> arrays = { + build_int32_array(ids), + build_nullable_int64_array(row_ids), + }; + if (!last_updated_sequence_numbers.empty()) { + schema = + schema->AddField(schema->num_fields(), + arrow::field("_last_updated_sequence_number", arrow::int64(), true) + ->WithMetadata(last_updated_sequence_number_metadata)) + .ValueOrDie(); + arrays.push_back(build_nullable_int64_array(last_updated_sequence_numbers)); + } + auto table = arrow::Table::Make(schema, arrays); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + static_cast(ids.size()), + builder.build())); +} + +void write_position_delete_parquet_file(const std::string& file_path, + const std::vector& data_file_paths, + const std::vector& positions) { + auto schema = arrow::schema({ + arrow::field("file_path", arrow::utf8(), false), + arrow::field("pos", arrow::int64(), false), + }); + auto table = arrow::Table::Make( + schema, {build_string_array(data_file_paths), build_int64_array(positions)}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + static_cast(positions.size()), + builder.build())); +} + +int64_t write_iceberg_deletion_vector_file(const std::string& file_path, + const std::vector& deleted_positions) { + roaring::Roaring64Map rows; + for (const auto position : deleted_positions) { + rows.add(position); + } + + const size_t bitmap_size = rows.getSizeInBytes(); + std::vector blob(4 + 4 + bitmap_size + 4); + rows.write(blob.data() + 8); + + const uint32_t total_length = static_cast(4 + bitmap_size); + BigEndian::Store32(blob.data(), total_length); + constexpr char DV_MAGIC[] = {'\xD1', '\xD3', '\x39', '\x64'}; + memcpy(blob.data() + 4, DV_MAGIC, 4); + BigEndian::Store32(blob.data() + 8 + bitmap_size, 0); + + std::ofstream output(file_path, std::ios::binary); + EXPECT_TRUE(output.is_open()); + output.write(blob.data(), static_cast(blob.size())); + EXPECT_TRUE(output.good()); + return static_cast(blob.size()); +} + +Block build_table_block(const std::vector& columns) { + Block block; + for (const auto& column : columns) { + block.insert({column.type->create_column(), column.type, column.name}); + } + return block; +} + +void expect_nullable_int64_column_values(const IColumn& column, + const std::vector& expected_values) { + const auto full_column = column.convert_to_full_column_if_const(); + const auto& nullable_column = assert_cast(*full_column); + const auto& values = + assert_cast(nullable_column.get_nested_column()).get_data(); + ASSERT_EQ(nullable_column.size(), expected_values.size()); + for (size_t row = 0; row < expected_values.size(); ++row) { + EXPECT_EQ(nullable_column.get_null_map_data()[row], 0); + EXPECT_EQ(values[row], expected_values[row]); + } +} + +void expect_nullable_int64_column_optional_values( + const IColumn& column, const std::vector>& expected_values) { + const auto full_column = column.convert_to_full_column_if_const(); + const auto& nullable_column = assert_cast(*full_column); + const auto& values = + assert_cast(nullable_column.get_nested_column()).get_data(); + ASSERT_EQ(nullable_column.size(), expected_values.size()); + for (size_t row = 0; row < expected_values.size(); ++row) { + if (expected_values[row].has_value()) { + EXPECT_EQ(nullable_column.get_null_map_data()[row], 0); + EXPECT_EQ(values[row], *expected_values[row]); + } else { + EXPECT_EQ(nullable_column.get_null_map_data()[row], 1); + } + } +} + +const IColumn& expect_not_null_nullable_nested_column(const IColumn& column) { + if (!column.is_nullable()) { + return column; + } + const auto& nullable_column = assert_cast(column); + for (const auto is_null : nullable_column.get_null_map_data()) { + EXPECT_EQ(is_null, 0); + } + return nullable_column.get_nested_column(); +} + +const IColumn& expect_not_null_table_column(const Block& block, size_t position) { + return expect_not_null_nullable_nested_column(*block.get_by_position(position).column); +} + +ColumnDefinition make_table_column(int32_t id, const std::string& name, const DataTypePtr& type); + +DataTypePtr make_iceberg_rowid_type() { + return make_nullable(std::make_shared( + DataTypes {std::make_shared(), std::make_shared(), + std::make_shared(), std::make_shared()}, + Strings {"file_path", "row_pos", "partition_spec_id", "partition_data_json"})); +} + +ColumnDefinition make_iceberg_row_lineage_row_id_column() { + return make_table_column(2147483540, "_row_id", + make_nullable(std::make_shared())); +} + +ColumnDefinition make_iceberg_last_updated_sequence_number_column() { + return make_table_column(2147483539, "_last_updated_sequence_number", + make_nullable(std::make_shared())); +} + +void expect_iceberg_rowid_column_values(const IColumn& column, const std::string& file_path, + const std::vector& row_positions, + int32_t partition_spec_id, + const std::string& partition_data_json) { + const auto full_column = column.convert_to_full_column_if_const(); + const auto& nullable_column = assert_cast(*full_column); + const auto& struct_column = + assert_cast(nullable_column.get_nested_column()); + const auto& file_path_column = assert_cast( + expect_not_null_nullable_nested_column(struct_column.get_column(0))); + const auto& row_pos_column = assert_cast( + expect_not_null_nullable_nested_column(struct_column.get_column(1))); + const auto& spec_id_column = assert_cast( + expect_not_null_nullable_nested_column(struct_column.get_column(2))); + const auto& partition_data_column = assert_cast( + expect_not_null_nullable_nested_column(struct_column.get_column(3))); + + ASSERT_EQ(nullable_column.size(), row_positions.size()); + for (size_t row = 0; row < row_positions.size(); ++row) { + EXPECT_EQ(nullable_column.get_null_map_data()[row], 0); + EXPECT_EQ(file_path_column.get_data_at(row).to_string(), file_path); + EXPECT_EQ(row_pos_column.get_element(row), row_positions[row]); + EXPECT_EQ(spec_id_column.get_element(row), partition_spec_id); + EXPECT_EQ(partition_data_column.get_data_at(row).to_string(), partition_data_json); + } +} + +void expect_int32_column_values(const IColumn& column, + const std::vector& expected_values) { + const auto full_column = column.convert_to_full_column_if_const(); + const auto& nested_column = expect_not_null_nullable_nested_column(*full_column); + const auto& values = assert_cast(nested_column).get_data(); + ASSERT_EQ(values.size(), expected_values.size()); + for (size_t row = 0; row < expected_values.size(); ++row) { + EXPECT_EQ(values[row], expected_values[row]); + } +} + +SplitReadOptions build_split_options(const std::string& file_path) { + SplitReadOptions options; + options.current_range.__set_path(file_path); + options.current_range.__set_file_size( + static_cast(std::filesystem::file_size(file_path))); + return options; +} + +void set_table_level_row_count(SplitReadOptions* split_options, int64_t row_count) { + split_options->current_range.__isset.table_format_params = true; + split_options->current_range.table_format_params.__isset.table_level_row_count = true; + split_options->current_range.table_format_params.table_level_row_count = row_count; +} + +void set_iceberg_row_lineage_params(SplitReadOptions* split_options, int64_t first_row_id, + int64_t last_updated_sequence_number) { + TTableFormatFileDesc table_format_params; + TIcebergFileDesc iceberg_params; + iceberg_params.__set_first_row_id(first_row_id); + iceberg_params.__set_last_updated_sequence_number(last_updated_sequence_number); + table_format_params.__set_iceberg_params(iceberg_params); + split_options->current_range.__set_table_format_params(table_format_params); +} + +void set_iceberg_rowid_params(SplitReadOptions* split_options, + const std::string& original_file_path, int32_t partition_spec_id, + const std::string& partition_data_json) { + TTableFormatFileDesc table_format_params; + TIcebergFileDesc iceberg_params; + iceberg_params.__set_original_file_path(original_file_path); + iceberg_params.__set_partition_spec_id(partition_spec_id); + iceberg_params.__set_partition_data_json(partition_data_json); + table_format_params.__set_iceberg_params(iceberg_params); + split_options->current_range.__set_table_format_params(table_format_params); +} + +TIcebergDeleteFileDesc make_iceberg_deletion_vector(const std::string& path, int64_t offset, + int64_t size) { + TIcebergDeleteFileDesc delete_file; + delete_file.__set_content(3); + delete_file.__set_path(path); + delete_file.__set_content_offset(offset); + delete_file.__set_content_size_in_bytes(size); + return delete_file; +} + +TIcebergDeleteFileDesc make_iceberg_position_delete_file(const std::string& path) { + TIcebergDeleteFileDesc delete_file; + delete_file.__set_content(1); + delete_file.__set_path(path); + delete_file.__set_file_format(TFileFormatType::FORMAT_PARQUET); + return delete_file; +} + +TIcebergDeleteFileDesc make_iceberg_equality_delete_file(const std::string& path, + const std::vector& field_ids) { + TIcebergDeleteFileDesc delete_file; + delete_file.__set_content(2); + delete_file.__set_path(path); + delete_file.__set_field_ids(field_ids); + delete_file.__set_file_format(TFileFormatType::FORMAT_PARQUET); + return delete_file; +} + +TFileScanRangeParams make_local_parquet_scan_params() { + TFileScanRangeParams scan_params; + scan_params.__set_file_type(TFileType::FILE_LOCAL); + scan_params.__set_format_type(TFileFormatType::FORMAT_PARQUET); + return scan_params; +} + +std::shared_ptr make_io_context(io::FileReaderStats* file_reader_stats, + io::FileCacheStatistics* file_cache_stats) { + auto io_ctx = std::make_shared(); + io_ctx->file_reader_stats = file_reader_stats; + io_ctx->file_cache_stats = file_cache_stats; + return io_ctx; +} + +TTableFormatFileDesc make_iceberg_table_format_desc( + const std::string& data_file_path, + const std::vector& delete_files) { + TTableFormatFileDesc table_format_params; + TIcebergFileDesc iceberg_params; + iceberg_params.__set_format_version(2); + iceberg_params.__set_original_file_path(data_file_path); + iceberg_params.__set_delete_files(delete_files); + table_format_params.__set_iceberg_params(iceberg_params); + return table_format_params; +} + +std::vector read_iceberg_ids(doris::format::iceberg::IcebergTableReader* reader, + const std::vector& projected_columns) { + std::vector ids; + bool eos = false; + while (!eos) { + Block block = build_table_block(projected_columns); + auto status = reader->get_block(&block, &eos); + if (!status.ok()) { + ADD_FAILURE() << status; + return ids; + } + if (block.rows() == 0) { + continue; + } + const auto& id_column = + assert_cast(expect_not_null_table_column(block, 0)); + for (size_t row = 0; row < block.rows(); ++row) { + ids.push_back(id_column.get_element(row)); + } + } + return ids; +} + +DataTypePtr make_table_test_type(const DataTypePtr& type, bool nullable_root = true) { + DORIS_CHECK(type != nullptr); + const auto nested_type = remove_nullable(type); + DataTypePtr result; + if (const auto* struct_type = typeid_cast(nested_type.get())) { + DataTypes child_types; + child_types.reserve(struct_type->get_elements().size()); + for (const auto& child_type : struct_type->get_elements()) { + child_types.push_back(make_table_test_type(child_type)); + } + result = std::make_shared(child_types, struct_type->get_element_names()); + } else if (const auto* array_type = typeid_cast(nested_type.get())) { + result = std::make_shared( + make_table_test_type(array_type->get_nested_type())); + } else if (const auto* map_type = typeid_cast(nested_type.get())) { + result = std::make_shared(make_table_test_type(map_type->get_key_type()), + make_table_test_type(map_type->get_value_type())); + } else { + result = nested_type; + } + return nullable_root ? make_nullable(result) : result; +} + +ColumnDefinition make_table_column(int32_t id, const std::string& name, const DataTypePtr& type) { + ColumnDefinition column; + if (id >= 0) { + column.identifier = Field::create_field(id); + } + column.name = name; + // TableReader tests model external table scan descriptors. Those table columns are nullable + // even when the Parquet file field itself is required, so keep the test schema aligned with + // the real scan contract at the construction boundary. + column.type = make_table_test_type(type); + return column; +} + +ColumnDefinition make_file_column(int32_t id, const std::string& name, const DataTypePtr& type) { + ColumnDefinition field; + field.identifier = Field::create_field(id); + field.local_id = id; + field.name = name; + field.type = make_table_test_type(type); + return field; +} + +void set_name_identifiers(std::vector* columns); + +void set_name_identifier(ColumnDefinition* column) { + DORIS_CHECK(column != nullptr); + column->identifier = Field::create_field(column->name); + set_name_identifiers(&column->children); +} + +void set_name_identifiers(std::vector* columns) { + DORIS_CHECK(columns != nullptr); + for (auto& column : *columns) { + set_name_identifier(&column); + } +} + +void add_column_predicate(TableColumnPredicates* column_predicates, GlobalIndex global_index, + std::shared_ptr predicate) { + auto& entry = (*column_predicates)[global_index]; + entry.push_back(std::move(predicate)); +} + +VExprContextSPtr prepared_conjunct(RuntimeState* state, const VExprSPtr& expr) { + auto ctx = VExprContext::create_shared(expr); + auto status = ctx->prepare(state, RowDescriptor()); + EXPECT_TRUE(status.ok()) << status; + status = ctx->open(state); + EXPECT_TRUE(status.ok()) << status; + return ctx; +} + +void apply_final_conjuncts(Block* block, const VExprContextSPtrs& conjuncts) { + const auto status = VExprContext::filter_block(conjuncts, block, block->columns()); + ASSERT_TRUE(status.ok()) << status; +} + +TEST(IcebergV2ReaderTest, IcebergVirtualColumnsUseRowLineageMetadata) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_iceberg_virtual_columns_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}); + + std::vector projected_columns; + projected_columns.push_back(make_iceberg_row_lineage_row_id_column()); + projected_columns.push_back(make_iceberg_last_updated_sequence_number_column()); + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {prepared_conjunct( + &state, table_int32_greater_than_expr(2, 2, 1))}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + auto split_options = build_split_options(file_path); + set_iceberg_row_lineage_params(&split_options, 1000, 77); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + + const auto& id_column = assert_cast(expect_not_null_table_column(block, 2)); + + ASSERT_EQ(block.rows(), 2); + EXPECT_EQ(id_column.get_element(0), 2); + EXPECT_EQ(id_column.get_element(1), 3); + expect_nullable_int64_column_values(*block.get_by_position(0).column, {1001, 1002}); + expect_nullable_int64_column_values(*block.get_by_position(1).column, {77, 77}); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, IcebergRowLineageUsesPhysicalRowIdAndFillsNulls) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_iceberg_physical_row_id_fill_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_iceberg_row_lineage_parquet_file(file_path, {1, 2, 3}, {7000, std::nullopt, 7002}, + {80, std::nullopt, 82}); + + std::vector projected_columns; + projected_columns.push_back(make_table_column( + 2147483540, "_row_id", make_nullable(std::make_shared()))); + projected_columns.push_back( + make_table_column(2147483539, "_last_updated_sequence_number", + make_nullable(std::make_shared()))); + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + auto split_options = build_split_options(file_path); + set_iceberg_row_lineage_params(&split_options, 1000, 77); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + + ASSERT_EQ(block.rows(), 3); + expect_nullable_int64_column_values(*block.get_by_position(0).column, {7000, 1001, 7002}); + expect_nullable_int64_column_values(*block.get_by_position(1).column, {80, 77, 82}); + expect_int32_column_values(*block.get_by_position(2).column, {1, 2, 3}); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, IcebergPhysicalRowIdKeepsNullsWithoutFirstRowId) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_iceberg_physical_row_id_no_first_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_iceberg_row_lineage_parquet_file(file_path, {1, 2, 3}, {7000, std::nullopt, 7002}, + {80, std::nullopt, 82}); + + std::vector projected_columns; + projected_columns.push_back(make_table_column( + 2147483540, "_row_id", make_nullable(std::make_shared()))); + projected_columns.push_back( + make_table_column(2147483539, "_last_updated_sequence_number", + make_nullable(std::make_shared()))); + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + + ASSERT_EQ(block.rows(), 3); + expect_nullable_int64_column_optional_values( + *block.get_by_position(0).column, + std::vector> {7000, std::nullopt, 7002}); + expect_nullable_int64_column_optional_values( + *block.get_by_position(1).column, + std::vector> {80, std::nullopt, 82}); + expect_int32_column_values(*block.get_by_position(2).column, {1, 2, 3}); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, IcebergMissingRowIdStaysNullWithoutFirstRowId) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_iceberg_missing_row_id_no_first_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}); + + std::vector projected_columns; + projected_columns.push_back(make_iceberg_row_lineage_row_id_column()); + projected_columns.push_back(make_iceberg_last_updated_sequence_number_column()); + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + + ASSERT_EQ(block.rows(), 3); + expect_nullable_int64_column_optional_values( + *block.get_by_position(0).column, + std::vector> {std::nullopt, std::nullopt, std::nullopt}); + expect_nullable_int64_column_optional_values( + *block.get_by_position(1).column, + std::vector> {std::nullopt, std::nullopt, std::nullopt}); + expect_int32_column_values(*block.get_by_position(2).column, {1, 2, 3}); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, IcebergRowIdPredicateFiltersAfterRowLineageMaterialization) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_iceberg_row_id_finalize_filter_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_iceberg_row_lineage_parquet_file(file_path, {1, 2, 3}, {7000, std::nullopt, 7002}, + {80, std::nullopt, 82}); + + std::vector projected_columns; + projected_columns.push_back(make_table_column( + 2147483540, "_row_id", make_nullable(std::make_shared()))); + projected_columns.push_back( + make_table_column(2147483539, "_last_updated_sequence_number", + make_nullable(std::make_shared()))); + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + VExprContextSPtrs conjuncts = {prepared_conjunct( + &state, + table_nullable_int64_binary_predicate("eq", TExprOpcode::EQ, 0, 0, "_row_id", 1001))}; + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = conjuncts, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + auto split_options = build_split_options(file_path); + set_iceberg_row_lineage_params(&split_options, 1000, 77); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 3); + + apply_final_conjuncts(&block, conjuncts); + ASSERT_EQ(block.rows(), 1); + expect_nullable_int64_column_values(*block.get_by_position(0).column, {1001}); + expect_nullable_int64_column_values(*block.get_by_position(1).column, {77}); + expect_int32_column_values(*block.get_by_position(2).column, {2}); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, IcebergLastUpdatedSequencePredicateFiltersAfterMaterialization) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_iceberg_sequence_finalize_filter_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_iceberg_row_lineage_parquet_file(file_path, {1, 2, 3}, {7000, std::nullopt, 7002}, + {80, std::nullopt, 82}); + + std::vector projected_columns; + projected_columns.push_back(make_table_column( + 2147483540, "_row_id", make_nullable(std::make_shared()))); + projected_columns.push_back( + make_table_column(2147483539, "_last_updated_sequence_number", + make_nullable(std::make_shared()))); + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + VExprContextSPtrs conjuncts = {prepared_conjunct( + &state, table_nullable_int64_binary_predicate("eq", TExprOpcode::EQ, 1, 1, + "_last_updated_sequence_number", 77))}; + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = conjuncts, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + auto split_options = build_split_options(file_path); + set_iceberg_row_lineage_params(&split_options, 1000, 77); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 3); + + apply_final_conjuncts(&block, conjuncts); + ASSERT_EQ(block.rows(), 1); + expect_nullable_int64_column_values(*block.get_by_position(0).column, {1001}); + expect_nullable_int64_column_values(*block.get_by_position(1).column, {77}); + expect_int32_column_values(*block.get_by_position(2).column, {2}); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, IcebergRowidVirtualColumnUsesDataFilePosition) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_iceberg_rowid_virtual_column_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}); + + std::vector projected_columns; + projected_columns.push_back( + make_table_column(-1, BeConsts::ICEBERG_ROWID_COL, make_iceberg_rowid_type())); + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {prepared_conjunct( + &state, table_int32_greater_than_expr(1, 1, 1))}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + auto split_options = build_split_options(file_path); + const auto original_file_path = "s3://bucket/table/data/original.parquet"; + const auto partition_data_json = R"({"part":"p1"})"; + set_iceberg_rowid_params(&split_options, original_file_path, 17, partition_data_json); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + + ASSERT_EQ(block.rows(), 2); + expect_iceberg_rowid_column_values(*block.get_by_position(0).column, original_file_path, {1, 2}, + 17, partition_data_json); + expect_int32_column_values(*block.get_by_position(1).column, {2, 3}); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, IcebergVirtualColumnsKeepRowLineageAfterConjunctFiltering) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_iceberg_virtual_columns_conjunct_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}); + + std::vector projected_columns; + projected_columns.push_back(make_iceberg_row_lineage_row_id_column()); + projected_columns.push_back(make_iceberg_last_updated_sequence_number_column()); + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {prepared_conjunct( + &state, table_int32_greater_than_expr(2, 2, 1))}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + auto split_options = build_split_options(file_path); + set_iceberg_row_lineage_params(&split_options, 3000, 88); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + + const auto& id_column = assert_cast(expect_not_null_table_column(block, 2)); + + ASSERT_EQ(block.rows(), 2); + EXPECT_EQ(id_column.get_element(0), 2); + EXPECT_EQ(id_column.get_element(1), 3); + expect_nullable_int64_column_values(*block.get_by_position(0).column, {3001, 3002}); + expect_nullable_int64_column_values(*block.get_by_position(1).column, {88, 88}); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, IcebergVirtualColumnsKeepRowLineageAfterRowGroupPredicatePruning) { + const auto test_dir = std::filesystem::temp_directory_path() / + "doris_iceberg_virtual_columns_row_group_predicate_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + // ColumnPredicate is used for row-group/statistics pruning. Keep one row per row group so + // id > 2 prunes the first two row groups and leaves only the third file-local row. + write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}, 1); + + std::vector projected_columns; + projected_columns.push_back(make_iceberg_row_lineage_row_id_column()); + projected_columns.push_back(make_iceberg_last_updated_sequence_number_column()); + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + TableColumnPredicates column_predicates; + add_column_predicate(&column_predicates, GlobalIndex(2), + create_comparison_predicate( + 0, "id", make_nullable(std::make_shared()), + Field::create_field(2), false)); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = std::move(column_predicates), + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + auto split_options = build_split_options(file_path); + set_iceberg_row_lineage_params(&split_options, 4000, 99); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + + const auto& id_column = assert_cast(expect_not_null_table_column(block, 2)); + + ASSERT_EQ(block.rows(), 1); + EXPECT_EQ(id_column.get_element(0), 3); + expect_nullable_int64_column_values(*block.get_by_position(0).column, {4002}); + expect_nullable_int64_column_values(*block.get_by_position(1).column, {99}); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, IcebergDeletionVectorUsesTableReaderDeleteFileInterface) { + TTableFormatFileDesc table_format_desc; + TIcebergFileDesc iceberg_desc; + iceberg_desc.__set_format_version(2); + iceberg_desc.__set_delete_files({make_iceberg_deletion_vector("dv.bin", 8, 128)}); + table_format_desc.__set_iceberg_params(iceberg_desc); + + IcebergTableReaderDeleteFileTestHelper reader; + DeleteFileDesc desc; + bool has_delete_file = false; + ASSERT_TRUE(reader.parse_deletion_vector_file(table_format_desc, &desc, &has_delete_file).ok()); + + EXPECT_TRUE(has_delete_file); + EXPECT_EQ(desc.path, "dv.bin"); + EXPECT_EQ(desc.start_offset, 8); + EXPECT_EQ(desc.size, 128); + EXPECT_EQ(desc.file_size, -1); + EXPECT_EQ(desc.format, DeleteFileDesc::Format::ICEBERG); +} + +TEST(IcebergV2ReaderTest, IcebergDeletionVectorRejectsMultipleDeleteFiles) { + TTableFormatFileDesc table_format_desc; + TIcebergFileDesc iceberg_desc; + iceberg_desc.__set_format_version(2); + iceberg_desc.__set_delete_files({make_iceberg_deletion_vector("dv-a.bin", 8, 128), + make_iceberg_deletion_vector("dv-b.bin", 16, 256)}); + table_format_desc.__set_iceberg_params(iceberg_desc); + + IcebergTableReaderDeleteFileTestHelper reader; + DeleteFileDesc desc; + bool has_delete_file = false; + auto status = reader.parse_deletion_vector_file(table_format_desc, &desc, &has_delete_file); + + EXPECT_FALSE(status.ok()); +} + +TEST(IcebergV2ReaderTest, IcebergTableReaderAppliesDeletionVectorFile) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_iceberg_deletion_vector_file_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + const auto dv_path = (test_dir / "delete-vector.bin").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3, 4, 5}, {10, 20, 30, 40, 50}, + {"one", "two", "three", "four", "five"}); + const auto dv_size = write_iceberg_deletion_vector_file(dv_path, {0, 4}); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeProfile profile("test_profile"); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto scan_params = make_local_parquet_scan_params(); + io::FileReaderStats file_reader_stats; + io::FileCacheStatistics file_cache_stats; + auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats); + ShardedKVCache cache(1); + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = &scan_params, + .io_ctx = io_ctx, + .runtime_state = &state, + .scanner_profile = &profile, + .push_down_agg_type = TPushAggOp::type::COUNT, + }) + .ok()); + + auto split_options = build_split_options(file_path); + split_options.cache = &cache; + split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc( + file_path, {make_iceberg_deletion_vector(dv_path, 0, dv_size)})); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + EXPECT_EQ(read_iceberg_ids(&reader, projected_columns), std::vector({2, 3, 4})); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, IcebergTableReaderDoesNotPushDownAggregateWithDeletes) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_iceberg_aggregate_delete_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + const auto dv_path = (test_dir / "delete-vector.bin").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}); + const auto dv_size = write_iceberg_deletion_vector_file(dv_path, {0}); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeProfile profile("test_profile"); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto scan_params = make_local_parquet_scan_params(); + io::FileReaderStats file_reader_stats; + io::FileCacheStatistics file_cache_stats; + auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats); + ShardedKVCache cache(1); + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = &scan_params, + .io_ctx = io_ctx, + .runtime_state = &state, + .scanner_profile = &profile, + .push_down_agg_type = TPushAggOp::type::COUNT, + }) + .ok()); + + auto split_options = build_split_options(file_path); + split_options.cache = &cache; + split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc( + file_path, {make_iceberg_deletion_vector(dv_path, 0, dv_size)})); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 2); + const auto& id_column = assert_cast(expect_not_null_table_column(block, 0)); + EXPECT_EQ(id_column.get_element(0), 2); + EXPECT_EQ(id_column.get_element(1), 3); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +// Covers TopN lazy materialization on Iceberg schema-evolution tables. The first-phase scan adds a +// synthesized GLOBAL_ROWID column to the file schema. That virtual column must not make Iceberg +// fall back from field-id mapping to name mapping, otherwise renamed columns are read as defaults +// from old files. +TEST(IcebergV2ReaderTest, IcebergMappingModeIgnoresGlobalRowIdVirtualColumn) { + IcebergTableReaderMappingModeTestHelper reader; + std::vector file_schema { + make_file_column(1, "id", std::make_shared()), + make_file_column(2, "name", std::make_shared()), + global_rowid_column_definition(), + }; + + EXPECT_EQ(reader.mapping_mode_for_schema(std::move(file_schema)), + TableColumnMappingMode::BY_FIELD_ID); +} + +// Covers the fallback side of the previous case. Only synthesized columns are ignored; a real data +// column without an Iceberg field id still disables field-id mapping. +TEST(IcebergV2ReaderTest, IcebergMappingModeRequiresFieldIdsForDataColumns) { + IcebergTableReaderMappingModeTestHelper reader; + std::vector file_schema { + make_file_column(1, "id", std::make_shared()), + make_file_column(2, "name", std::make_shared()), + global_rowid_column_definition(), + }; + file_schema[1].identifier = Field {}; + + EXPECT_EQ(reader.mapping_mode_for_schema(std::move(file_schema)), + TableColumnMappingMode::BY_NAME); +} + +TEST(IcebergV2ReaderTest, IcebergTableReaderDoesNotPushDownAggregateWithPositionDelete) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_iceberg_aggregate_position_delete_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + const auto delete_file_path = (test_dir / "position-delete.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}); + write_position_delete_parquet_file(delete_file_path, {file_path}, {1}); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeProfile profile("test_profile"); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto scan_params = make_local_parquet_scan_params(); + io::FileReaderStats file_reader_stats; + io::FileCacheStatistics file_cache_stats; + auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats); + ShardedKVCache cache(1); + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = &scan_params, + .io_ctx = io_ctx, + .runtime_state = &state, + .scanner_profile = &profile, + .push_down_agg_type = TPushAggOp::type::COUNT, + }) + .ok()); + + auto split_options = build_split_options(file_path); + split_options.cache = &cache; + split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc( + file_path, {make_iceberg_position_delete_file(delete_file_path)})); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 2); + const auto& id_column = assert_cast(expect_not_null_table_column(block, 0)); + EXPECT_EQ(id_column.get_element(0), 1); + EXPECT_EQ(id_column.get_element(1), 3); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, IcebergTableLevelCountUsesAssignedRowCountWithPositionDelete) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_iceberg_table_level_count_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + const auto delete_file_path = (test_dir / "position-delete.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}); + write_position_delete_parquet_file(delete_file_path, {file_path}, {1}); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + TQueryOptions query_options; + query_options.__set_batch_size(10); + RuntimeState state {query_options, TQueryGlobals()}; + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .push_down_agg_type = TPushAggOp::type::COUNT, + }) + .ok()); + + auto split_options = build_split_options(file_path); + split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc( + file_path, {make_iceberg_position_delete_file(delete_file_path)})); + set_table_level_row_count(&split_options, 5); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + EXPECT_EQ(block.rows(), 5); + + block = build_table_block(projected_columns); + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + EXPECT_TRUE(eos); + EXPECT_EQ(block.rows(), 0); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, IcebergPositionDeleteFallsBackToSplitPath) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_iceberg_position_delete_path_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + const auto delete_file_path = (test_dir / "position-delete.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}); + write_position_delete_parquet_file(delete_file_path, {file_path}, {1}); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeProfile profile("test_profile"); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto scan_params = make_local_parquet_scan_params(); + io::FileReaderStats file_reader_stats; + io::FileCacheStatistics file_cache_stats; + auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats); + ShardedKVCache cache(1); + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = &scan_params, + .io_ctx = io_ctx, + .runtime_state = &state, + .scanner_profile = &profile, + }) + .ok()); + + auto split_options = build_split_options(file_path); + split_options.cache = &cache; + TTableFormatFileDesc table_format_params; + TIcebergFileDesc iceberg_params; + iceberg_params.__set_format_version(2); + iceberg_params.__set_delete_files({make_iceberg_position_delete_file(delete_file_path)}); + table_format_params.__set_iceberg_params(iceberg_params); + split_options.current_range.__set_table_format_params(table_format_params); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + EXPECT_EQ(read_iceberg_ids(&reader, projected_columns), std::vector({1, 3})); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, IcebergTableReaderDoesNotPushDownAggregateWithEqualityDelete) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_iceberg_aggregate_equality_delete_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + const auto delete_file_path = (test_dir / "equality-delete.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}); + write_iceberg_equality_delete_parquet_file(delete_file_path, 0, 2); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeProfile profile("test_profile"); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto scan_params = make_local_parquet_scan_params(); + io::FileReaderStats file_reader_stats; + io::FileCacheStatistics file_cache_stats; + auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats); + ShardedKVCache cache(1); + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = &scan_params, + .io_ctx = io_ctx, + .runtime_state = &state, + .scanner_profile = &profile, + .push_down_agg_type = TPushAggOp::type::COUNT, + }) + .ok()); + + auto split_options = build_split_options(file_path); + split_options.cache = &cache; + split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc( + file_path, {make_iceberg_equality_delete_file(delete_file_path, {0})})); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 2); + const auto& id_column = assert_cast(expect_not_null_table_column(block, 0)); + EXPECT_EQ(id_column.get_element(0), 1); + EXPECT_EQ(id_column.get_element(1), 3); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, IcebergEqualityDeleteCastsDataColumnToDeleteKeyType) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_iceberg_equality_delete_cast_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + const auto delete_file_path = (test_dir / "equality-delete.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}); + write_iceberg_equality_delete_bigint_parquet_file(delete_file_path, 0, 2); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeProfile profile("test_profile"); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto scan_params = make_local_parquet_scan_params(); + io::FileReaderStats file_reader_stats; + io::FileCacheStatistics file_cache_stats; + auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats); + ShardedKVCache cache(1); + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = &scan_params, + .io_ctx = io_ctx, + .runtime_state = &state, + .scanner_profile = &profile, + }) + .ok()); + + auto split_options = build_split_options(file_path); + split_options.cache = &cache; + split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc( + file_path, {make_iceberg_equality_delete_file(delete_file_path, {0})})); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + EXPECT_EQ(read_iceberg_ids(&reader, projected_columns), std::vector({1, 3})); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, IcebergPositionDeleteOnlyMatchesOriginalDataFilePath) { + const auto test_dir = std::filesystem::temp_directory_path() / + "doris_iceberg_position_delete_path_match_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + const auto other_file_path = (test_dir / "other.parquet").string(); + const auto delete_file_path = (test_dir / "position-delete.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}); + write_position_delete_parquet_file(delete_file_path, {other_file_path, file_path}, {0, 1}); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeProfile profile("test_profile"); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto scan_params = make_local_parquet_scan_params(); + io::FileReaderStats file_reader_stats; + io::FileCacheStatistics file_cache_stats; + auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats); + ShardedKVCache cache(1); + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = &scan_params, + .io_ctx = io_ctx, + .runtime_state = &state, + .scanner_profile = &profile, + }) + .ok()); + + auto split_options = build_split_options(file_path); + split_options.cache = &cache; + split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc( + file_path, {make_iceberg_position_delete_file(delete_file_path)})); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + EXPECT_EQ(read_iceberg_ids(&reader, projected_columns), std::vector({1, 3})); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, IcebergRowLineageRemainsFileLocalAfterDeleteFiltering) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_iceberg_row_lineage_delete_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + const auto delete_file_path = (test_dir / "position-delete.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}); + write_position_delete_parquet_file(delete_file_path, {file_path}, {1}); + + std::vector projected_columns; + projected_columns.push_back(make_iceberg_row_lineage_row_id_column()); + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeProfile profile("test_profile"); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto scan_params = make_local_parquet_scan_params(); + io::FileReaderStats file_reader_stats; + io::FileCacheStatistics file_cache_stats; + auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats); + ShardedKVCache cache(1); + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = &scan_params, + .io_ctx = io_ctx, + .runtime_state = &state, + .scanner_profile = &profile, + }) + .ok()); + + auto split_options = build_split_options(file_path); + split_options.cache = &cache; + TTableFormatFileDesc table_format_params = make_iceberg_table_format_desc( + file_path, {make_iceberg_position_delete_file(delete_file_path)}); + table_format_params.iceberg_params.__set_first_row_id(1000); + split_options.current_range.__set_table_format_params(table_format_params); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 2); + expect_nullable_int64_column_values(*block.get_by_position(0).column, {1000, 1002}); + const auto& id_column = assert_cast(expect_not_null_table_column(block, 1)); + EXPECT_EQ(id_column.get_element(0), 1); + EXPECT_EQ(id_column.get_element(1), 3); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, IcebergTableReaderAppliesPositionDeleteFile) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_iceberg_position_delete_file_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + const auto delete_file_path = (test_dir / "position-delete.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3, 4, 5}, {10, 20, 30, 40, 50}, + {"one", "two", "three", "four", "five"}); + write_position_delete_parquet_file(delete_file_path, {file_path, file_path}, {1, 3}); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeProfile profile("test_profile"); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto scan_params = make_local_parquet_scan_params(); + io::FileReaderStats file_reader_stats; + io::FileCacheStatistics file_cache_stats; + auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats); + ShardedKVCache cache(1); + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = &scan_params, + .io_ctx = io_ctx, + .runtime_state = &state, + .scanner_profile = &profile, + }) + .ok()); + + auto split_options = build_split_options(file_path); + split_options.cache = &cache; + split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc( + file_path, {make_iceberg_position_delete_file(delete_file_path)})); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + EXPECT_EQ(read_iceberg_ids(&reader, projected_columns), std::vector({1, 3, 5})); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, IcebergTableReaderMergesDeletionVectorAndPositionDeleteFiles) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_iceberg_delete_files_merge_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + const auto dv_path = (test_dir / "delete-vector.bin").string(); + const auto position_delete_path = (test_dir / "position-delete.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3, 4, 5}, {10, 20, 30, 40, 50}, + {"one", "two", "three", "four", "five"}); + const auto dv_size = write_iceberg_deletion_vector_file(dv_path, {0}); + write_position_delete_parquet_file(position_delete_path, {file_path, file_path}, {3, 3}); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeProfile profile("test_profile"); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto scan_params = make_local_parquet_scan_params(); + io::FileReaderStats file_reader_stats; + io::FileCacheStatistics file_cache_stats; + auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats); + ShardedKVCache cache(1); + doris::format::iceberg::IcebergTableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = &scan_params, + .io_ctx = io_ctx, + .runtime_state = &state, + .scanner_profile = &profile, + }) + .ok()); + + auto split_options = build_split_options(file_path); + split_options.cache = &cache; + split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc( + file_path, {make_iceberg_deletion_vector(dv_path, 0, dv_size), + make_iceberg_position_delete_file(position_delete_path)})); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + EXPECT_EQ(read_iceberg_ids(&reader, projected_columns), std::vector({2, 3, 5})); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(IcebergV2ReaderTest, RowPositionDeletePredicateColumnIsNotRepeatedAsOutputColumn) { + const auto row_position_column_id = ROW_POSITION_COLUMN_ID; + std::vector projected_columns; + projected_columns.push_back(make_iceberg_row_lineage_row_id_column()); + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + IcebergTableReaderScanRequestTestHelper reader; + ASSERT_TRUE(reader.init_for_scan_request_test(projected_columns).ok()); + + FileScanRequest request; + request.non_predicate_columns.push_back(field_projection(0)); + request.local_positions.emplace(LocalColumnId(0), LocalIndex(0)); + + ASSERT_TRUE(reader.customize_request(&request).ok()); + + EXPECT_EQ(projection_ids(request.predicate_columns), + std::vector({row_position_column_id})); + EXPECT_EQ(projection_ids(request.non_predicate_columns), std::vector({0})); + ASSERT_TRUE(request.local_positions.contains(LocalColumnId(row_position_column_id))); + EXPECT_EQ(request.local_positions.at(LocalColumnId(row_position_column_id)).value(), 1); + ASSERT_TRUE(request.conjuncts.empty()); + ASSERT_EQ(request.delete_conjuncts.size(), 1); + EXPECT_NE(request.delete_conjuncts[0], nullptr); +} + +} // namespace +} // namespace doris::format diff --git a/be/test/format_v2/table/paimon_reader_test.cpp b/be/test/format_v2/table/paimon_reader_test.cpp new file mode 100644 index 00000000000000..fce0244c1738bd --- /dev/null +++ b/be/test/format_v2/table/paimon_reader_test.cpp @@ -0,0 +1,539 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/table/paimon_reader.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/field.h" +#include "exec/common/endian.h" +#include "format/format_common.h" +#include "format_v2/column_data.h" +#include "gen_cpp/ExternalTableSchema_types.h" +#include "gen_cpp/PlanNodes_types.h" +#include "io/io_common.h" +#include "roaring/roaring.hh" +#include "runtime/runtime_profile.h" +#include "runtime/runtime_state.h" + +namespace doris::format { +namespace { + +DataTypePtr table_type(const DataTypePtr& type) { + return type->is_nullable() ? type : make_nullable(type); +} + +ColumnDefinition make_table_column(int32_t id, const std::string& name, const DataTypePtr& type) { + ColumnDefinition column; + column.identifier = Field::create_field(id); + column.name = name; + column.type = table_type(type); + return column; +} + +ColumnDefinition make_file_column(int32_t id, const std::string& name, const DataTypePtr& type) { + ColumnDefinition column; + column.identifier = Field::create_field(id); + column.local_id = id; + column.name = name; + column.type = type; + return column; +} + +schema::external::TFieldPtr external_schema_field(std::string name, int32_t id, + std::vector aliases = {}) { + auto field = std::make_shared(); + field->__set_name(std::move(name)); + field->__set_id(id); + if (!aliases.empty()) { + field->__set_name_mapping(std::move(aliases)); + } + schema::external::TFieldPtr field_ptr; + field_ptr.field_ptr = std::move(field); + field_ptr.__isset.field_ptr = true; + return field_ptr; +} + +schema::external::TFieldPtr external_array_field(std::string name, int32_t id, + schema::external::TFieldPtr item_field, + std::vector aliases = {}) { + auto field = external_schema_field(std::move(name), id, std::move(aliases)); + schema::external::TArrayField array_field; + array_field.__set_item_field(std::move(item_field)); + field.field_ptr->nestedField.__set_array_field(std::move(array_field)); + field.field_ptr->__isset.nestedField = true; + return field; +} + +schema::external::TFieldPtr external_map_field(std::string name, int32_t id, + schema::external::TFieldPtr key_field, + schema::external::TFieldPtr value_field, + std::vector aliases = {}) { + auto field = external_schema_field(std::move(name), id, std::move(aliases)); + schema::external::TMapField map_field; + map_field.__set_key_field(std::move(key_field)); + map_field.__set_value_field(std::move(value_field)); + field.field_ptr->nestedField.__set_map_field(std::move(map_field)); + field.field_ptr->__isset.nestedField = true; + return field; +} + +schema::external::TSchema external_schema(int64_t schema_id, + std::vector fields) { + schema::external::TStructField root_field; + root_field.__set_fields(std::move(fields)); + schema::external::TSchema schema; + schema.__set_schema_id(schema_id); + schema.__set_root_field(std::move(root_field)); + return schema; +} + +Block build_table_block(const std::vector& columns) { + Block block; + for (const auto& column : columns) { + block.insert({column.type->create_column(), column.type, column.name}); + } + return block; +} + +const IColumn& expect_not_null_nullable_nested_column(const IColumn& column) { + if (!column.is_nullable()) { + return column; + } + const auto& nullable_column = assert_cast(column); + for (const auto is_null : nullable_column.get_null_map_data()) { + EXPECT_EQ(is_null, 0); + } + return nullable_column.get_nested_column(); +} + +const IColumn& expect_not_null_table_column(const Block& block, size_t position) { + return expect_not_null_nullable_nested_column(*block.get_by_position(position).column); +} + +std::shared_ptr build_int32_array(const std::vector& values) { + arrow::Int32Builder builder; + for (const auto value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + std::shared_ptr array; + EXPECT_TRUE(builder.Finish(&array).ok()); + return array; +} + +std::shared_ptr build_string_array(const std::vector& values) { + arrow::StringBuilder builder; + for (const auto& value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + std::shared_ptr array; + EXPECT_TRUE(builder.Finish(&array).ok()); + return array; +} + +void write_int_pair_parquet_file(const std::string& file_path, const std::vector& ids, + const std::vector& scores, + const std::vector& values) { + ASSERT_EQ(ids.size(), scores.size()); + ASSERT_EQ(ids.size(), values.size()); + auto schema = arrow::schema({ + arrow::field("id", arrow::int32(), false), + arrow::field("score", arrow::int32(), false), + arrow::field("value", arrow::utf8(), false), + }); + auto table = arrow::Table::Make(schema, {build_int32_array(ids), build_int32_array(scores), + build_string_array(values)}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + static_cast(ids.size()), + builder.build())); +} + +int64_t write_paimon_deletion_vector_file(const std::string& file_path, + const std::vector& deleted_positions) { + roaring::Roaring rows; + for (const auto position : deleted_positions) { + rows.add(position); + } + + const size_t bitmap_size = rows.getSizeInBytes(); + const uint32_t total_length = static_cast(4 + bitmap_size); + std::vector blob(4 + total_length); + BigEndian::Store32(blob.data(), total_length); + constexpr char PAIMON_BITMAP_MAGIC[] = {'\x5E', '\x43', '\xF2', '\xD0'}; + memcpy(blob.data() + 4, PAIMON_BITMAP_MAGIC, 4); + rows.write(blob.data() + 8); + + std::ofstream output(file_path, std::ios::binary); + EXPECT_TRUE(output.is_open()); + output.write(blob.data(), static_cast(blob.size())); + EXPECT_TRUE(output.good()); + // Paimon DeletionFile.length is magic + bitmap length, excluding the leading length field. + return static_cast(total_length); +} + +TFileScanRangeParams make_local_parquet_scan_params() { + TFileScanRangeParams scan_params; + scan_params.__set_file_type(TFileType::FILE_LOCAL); + scan_params.__set_format_type(TFileFormatType::FORMAT_PARQUET); + return scan_params; +} + +std::shared_ptr make_io_context(io::FileReaderStats* file_reader_stats, + io::FileCacheStatistics* file_cache_stats) { + auto io_ctx = std::make_shared(); + io_ctx->file_reader_stats = file_reader_stats; + io_ctx->file_cache_stats = file_cache_stats; + return io_ctx; +} + +SplitReadOptions build_split_options(const std::string& file_path) { + SplitReadOptions options; + options.current_range.__set_path(file_path); + options.current_range.__set_file_size( + static_cast(std::filesystem::file_size(file_path))); + return options; +} + +TTableFormatFileDesc make_paimon_table_format_desc(const std::string& deletion_file_path, + int64_t offset, int64_t length) { + TTableFormatFileDesc table_format_params; + TPaimonFileDesc paimon_params; + paimon_params.__set_file_format("parquet"); + TPaimonDeletionFileDesc deletion_file; + deletion_file.__set_path(deletion_file_path); + deletion_file.__set_offset(offset); + deletion_file.__set_length(length); + paimon_params.__set_deletion_file(deletion_file); + table_format_params.__set_paimon_params(paimon_params); + return table_format_params; +} + +TTableFormatFileDesc make_paimon_schema_table_format_desc(int64_t schema_id) { + TTableFormatFileDesc table_format_params; + table_format_params.__set_table_format_type("paimon"); + TPaimonFileDesc paimon_params; + paimon_params.__set_file_format("parquet"); + paimon_params.__set_schema_id(schema_id); + table_format_params.__set_paimon_params(paimon_params); + return table_format_params; +} + +TFileRangeDesc make_paimon_native_range(TFileFormatType::type format_type) { + TFileRangeDesc range; + range.__set_path(format_type == TFileFormatType::FORMAT_ORC ? "s3://bucket/native.orc" + : "s3://bucket/native.parquet"); + range.__set_format_type(format_type); + TTableFormatFileDesc table_format_params; + table_format_params.__set_table_format_type("paimon"); + TPaimonFileDesc paimon_params; + paimon_params.__set_file_format(format_type == TFileFormatType::FORMAT_ORC ? "orc" : "parquet"); + paimon_params.__set_reader_type(TPaimonReaderType::PAIMON_NATIVE); + table_format_params.__set_paimon_params(paimon_params); + range.__set_table_format_params(table_format_params); + return range; +} + +TFileRangeDesc make_paimon_jni_range() { + TFileRangeDesc range; + range.__set_path("/data-placeholder.parquet"); + range.__set_format_type(TFileFormatType::FORMAT_JNI); + TTableFormatFileDesc table_format_params; + table_format_params.__set_table_format_type("paimon"); + TPaimonFileDesc paimon_params; + paimon_params.__set_file_format("parquet"); + paimon_params.__set_reader_type(TPaimonReaderType::PAIMON_JNI); + paimon_params.__set_paimon_split("serialized-paimon-split"); + table_format_params.__set_paimon_params(paimon_params); + range.__set_table_format_params(table_format_params); + return range; +} + +TFileRangeDesc make_paimon_range_without_reader_type(TFileFormatType::type format_type) { + TFileRangeDesc range = make_paimon_native_range(format_type); + range.table_format_params.paimon_params.__isset.reader_type = false; + return range; +} + +// Scenario: PaimonReader shares Hudi's history-schema annotation path. A split whose schema id +// resolves to a historical schema should use field-id mapping and annotate array/map children so +// TableColumnMapper can match evolved physical Parquet columns by id instead of by the old names. +TEST(PaimonReaderTest, AnnotatesArrayAndMapFileSchemaFromSplitHistorySchema) { + TFileScanRangeParams scan_params; + scan_params.__set_current_schema_id(200); + scan_params.__set_history_schema_info({ + external_schema( + 100, + {external_array_field("old_tags", 30, + external_schema_field("old_item", 31, {"tag"}), {"tags"}), + external_map_field( + "old_props", 40, external_schema_field("old_key", 41, {"key"}), + external_schema_field("old_value", 42, {"score"}), {"props"})}), + external_schema( + 200, {external_schema_field("tags", 30), external_schema_field("props", 40)}), + }); + + paimon::PaimonReader reader; + reader.TEST_set_scan_params(&scan_params); + + SplitReadOptions split_options; + split_options.current_range.__set_table_format_params( + make_paimon_schema_table_format_desc(100)); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_FIELD_ID); + + const auto string_type = std::make_shared(); + const auto int_type = std::make_shared(); + + auto tags = make_file_column(0, "old_tags", std::make_shared(string_type)); + tags.children = {make_file_column(0, "old_item", string_type)}; + + auto props = + make_file_column(1, "old_props", std::make_shared(string_type, int_type)); + props.children = {make_file_column(0, "old_key", string_type), + make_file_column(1, "old_value", int_type)}; + + std::vector file_schema {tags, props}; + ASSERT_TRUE(reader.TEST_annotate_file_schema(&file_schema).ok()); + + ASSERT_EQ(file_schema.size(), 2); + EXPECT_EQ(file_schema[0].get_identifier_field_id(), 30); + EXPECT_EQ(file_schema[0].name_mapping, std::vector({"tags"})); + ASSERT_EQ(file_schema[0].children.size(), 1); + EXPECT_EQ(file_schema[0].children[0].get_identifier_field_id(), 31); + EXPECT_EQ(file_schema[0].children[0].name_mapping, std::vector({"tag"})); + + EXPECT_EQ(file_schema[1].get_identifier_field_id(), 40); + EXPECT_EQ(file_schema[1].name_mapping, std::vector({"props"})); + ASSERT_EQ(file_schema[1].children.size(), 2); + EXPECT_EQ(file_schema[1].children[0].get_identifier_field_id(), 41); + EXPECT_EQ(file_schema[1].children[0].name_mapping, std::vector({"key"})); + EXPECT_EQ(file_schema[1].children[1].get_identifier_field_id(), 42); + EXPECT_EQ(file_schema[1].children[1].name_mapping, std::vector({"score"})); +} + +// Scenario: when FE does not send a matching historical schema for the split schema id, Paimon must +// stay on BY_NAME mapping and must not rewrite the file schema identifiers. +TEST(PaimonReaderTest, FallsBackToByNameWhenSplitHistorySchemaIsMissing) { + TFileScanRangeParams scan_params; + scan_params.__set_current_schema_id(200); + scan_params.__set_history_schema_info({ + external_schema(200, {external_schema_field("name", 10)}), + }); + + paimon::PaimonReader reader; + reader.TEST_set_scan_params(&scan_params); + + SplitReadOptions split_options; + split_options.current_range.__set_table_format_params( + make_paimon_schema_table_format_desc(100)); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_NAME); + + std::vector file_schema { + make_file_column(0, "old_name", std::make_shared()), + }; + ASSERT_TRUE(reader.TEST_annotate_file_schema(&file_schema).ok()); + EXPECT_EQ(file_schema[0].get_identifier_field_id(), 0); + EXPECT_TRUE(file_schema[0].name_mapping.empty()); +} + +// Scenario: PaimonReader must clear the previous split schema id before reading a new split. A +// schema-evolved split must not force the following split without schema id to keep BY_FIELD_ID. +TEST(PaimonReaderTest, ResetsSplitSchemaIdBeforePreparingNextSplit) { + TFileScanRangeParams scan_params; + scan_params.__set_current_schema_id(200); + scan_params.__set_history_schema_info({ + external_schema(100, {external_schema_field("old_name", 10, {"name"})}), + external_schema(200, {external_schema_field("name", 10)}), + }); + + paimon::PaimonReader reader; + reader.TEST_set_scan_params(&scan_params); + + SplitReadOptions split_with_schema_id; + split_with_schema_id.current_range.__set_table_format_params( + make_paimon_schema_table_format_desc(100)); + ASSERT_TRUE(reader.prepare_split(split_with_schema_id).ok()); + EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_FIELD_ID); + + SplitReadOptions split_without_schema_id; + TTableFormatFileDesc table_format_params; + table_format_params.__set_table_format_type("paimon"); + table_format_params.__set_paimon_params(TPaimonFileDesc {}); + split_without_schema_id.current_range.__set_table_format_params(table_format_params); + ASSERT_TRUE(reader.prepare_split(split_without_schema_id).ok()); + EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_NAME); +} + +// Scenario: Paimon reader should parse its bitmap deletion vector and let TableReader apply the +// generated row-position delete predicate before returning table rows. +TEST(PaimonReaderTest, AppliesBitmapDeletionVectorFile) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_paimon_deletion_vector_file_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + const auto dv_path = (test_dir / "delete-vector.bin").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3, 4, 5}, {10, 20, 30, 40, 50}, + {"one", "two", "three", "four", "five"}); + const auto dv_length = write_paimon_deletion_vector_file(dv_path, {0, 4}); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeProfile profile("test_profile"); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto scan_params = make_local_parquet_scan_params(); + io::FileReaderStats file_reader_stats; + io::FileCacheStatistics file_cache_stats; + auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats); + ShardedKVCache cache(1); + paimon::PaimonReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = &scan_params, + .io_ctx = io_ctx, + .runtime_state = &state, + .scanner_profile = &profile, + }) + .ok()); + + auto split_options = build_split_options(file_path); + split_options.cache = &cache; + split_options.current_range.__set_table_format_params( + make_paimon_table_format_desc(dv_path, 0, dv_length)); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + std::vector ids; + bool eos = false; + while (!eos) { + Block block = build_table_block(projected_columns); + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + if (block.rows() == 0) { + continue; + } + const auto& id_column = + assert_cast(expect_not_null_table_column(block, 0)); + for (size_t row = 0; row < block.rows(); ++row) { + ids.push_back(id_column.get_element(row)); + } + } + EXPECT_EQ(ids, std::vector({2, 3, 4})); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(PaimonHybridReaderTest, ClassifiesJniSplitByReaderType) { + EXPECT_FALSE(paimon::PaimonHybridReader::TEST_is_jni_split( + make_paimon_native_range(TFileFormatType::FORMAT_PARQUET))); + EXPECT_FALSE(paimon::PaimonHybridReader::TEST_is_jni_split( + make_paimon_range_without_reader_type(TFileFormatType::FORMAT_JNI))); + EXPECT_TRUE(paimon::PaimonHybridReader::TEST_is_jni_split(make_paimon_jni_range())); +} + +TEST(PaimonHybridReaderTest, ConvertsNativeSplitFileFormat) { + FileFormat file_format; + ASSERT_TRUE(paimon::PaimonHybridReader::TEST_to_file_format( + make_paimon_native_range(TFileFormatType::FORMAT_PARQUET), &file_format) + .ok()); + EXPECT_EQ(file_format, FileFormat::PARQUET); + + ASSERT_TRUE(paimon::PaimonHybridReader::TEST_to_file_format( + make_paimon_native_range(TFileFormatType::FORMAT_ORC), &file_format) + .ok()); + EXPECT_EQ(file_format, FileFormat::ORC); + + auto status = + paimon::PaimonHybridReader::TEST_to_file_format(make_paimon_jni_range(), &file_format); + EXPECT_FALSE(status.ok()); + EXPECT_NE(std::string::npos, status.to_string().find("Unsupported native Paimon file format")); +} + +TEST(PaimonHybridReaderTest, DispatchesNativeThenJniSplitToMatchingReader) { + RuntimeProfile profile("test_profile"); + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto scan_params = make_local_parquet_scan_params(); + io::FileReaderStats file_reader_stats; + io::FileCacheStatistics file_cache_stats; + auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats); + + paimon::PaimonHybridReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = {}, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = &scan_params, + .io_ctx = io_ctx, + .runtime_state = &state, + .scanner_profile = &profile, + }) + .ok()); + + SplitReadOptions native_split; + native_split.current_range = make_paimon_native_range(TFileFormatType::FORMAT_PARQUET); + native_split.current_split_format = FileFormat::PARQUET; + ASSERT_TRUE(reader.prepare_split(native_split).ok()); + + SplitReadOptions jni_split; + jni_split.current_range = make_paimon_jni_range(); + jni_split.current_split_format = FileFormat::JNI; + auto status = reader.prepare_split(jni_split); + EXPECT_FALSE(status.ok()); + EXPECT_NE(std::string::npos, status.to_string().find("missing serialized_table")); + + ASSERT_TRUE(reader.close().ok()); +} + +} // namespace +} // namespace doris::format diff --git a/be/test/format_v2/table/remote_doris_reader_test.cpp b/be/test/format_v2/table/remote_doris_reader_test.cpp new file mode 100644 index 00000000000000..b17f82f505c2c9 --- /dev/null +++ b/be/test/format_v2/table/remote_doris_reader_test.cpp @@ -0,0 +1,470 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/table/remote_doris_reader.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "common/object_pool.h" +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type/data_type_struct.h" +#include "exprs/vexpr.h" +#include "exprs/vexpr_context.h" +#include "format_v2/file_reader.h" +#include "gen_cpp/PlanNodes_types.h" +#include "io/file_factory.h" +#include "io/io_common.h" +#include "runtime/runtime_profile.h" +#include "runtime/runtime_state.h" +#include "testutil/desc_tbl_builder.h" + +namespace doris::format::remote_doris { +namespace { + +class BatchRemoteDorisStream final : public RemoteDorisStream { +public: + BatchRemoteDorisStream(std::vector> batches, + std::shared_ptr close_count) + : _batches(std::move(batches)), _close_count(std::move(close_count)) {} + + Status next(std::shared_ptr* batch) override { + DORIS_CHECK(batch != nullptr); + if (_next_batch >= _batches.size()) { + *batch = nullptr; + return Status::OK(); + } + *batch = _batches[_next_batch++]; + return Status::OK(); + } + + Status close() override { + ++(*_close_count); + return Status::OK(); + } + +private: + std::vector> _batches; + std::shared_ptr _close_count; + size_t _next_batch = 0; +}; + +TFileRangeDesc remote_doris_range() { + TRemoteDorisFileDesc remote_desc; + remote_desc.__set_location_uri("grpc://127.0.0.1:9050"); + remote_desc.__set_ticket("ticket-bytes"); + + TTableFormatFileDesc table_desc; + table_desc.__set_table_format_type("remote_doris"); + table_desc.__set_remote_doris_params(std::move(remote_desc)); + + TFileRangeDesc range; + range.__set_format_type(TFileFormatType::FORMAT_ARROW); + range.__set_path("/dummyPath"); + range.__set_table_format_params(std::move(table_desc)); + return range; +} + +std::vector remote_slots(ObjectPool* pool, DescriptorTbl** desc_tbl) { + DescriptorTblBuilder builder(pool); + builder.declare_tuple() << std::make_tuple(std::make_shared(), std::string("id")) + << std::make_tuple(std::make_shared(), + std::string("name")); + *desc_tbl = builder.build(); + return (*desc_tbl)->get_tuple_descriptor(0)->slots(); +} + +TSlotDescriptor remote_complex_slot_descriptor(int id, const DataTypePtr& type, + const std::string& name) { + TSlotDescriptor slot_desc; + slot_desc.__set_id(id); + slot_desc.__set_parent(0); + slot_desc.__set_slotType(type->to_thrift()); + slot_desc.__set_byteOffset(0); + slot_desc.__set_nullIndicatorByte(id / 8); + slot_desc.__set_nullIndicatorBit(id % 8); + slot_desc.__set_slotIdx(id); + slot_desc.__set_columnPos(id); + slot_desc.__set_isMaterialized(true); + slot_desc.__set_is_key(false); + slot_desc.__set_colName(name); + slot_desc.__set_col_unique_id(id); + return slot_desc; +} + +std::vector remote_complex_slots(ObjectPool* pool, DescriptorTbl** desc_tbl) { + const auto string_type = make_nullable(std::make_shared()); + const auto int_type = make_nullable(std::make_shared()); + const auto array_type = make_nullable(std::make_shared(string_type)); + const auto map_type = make_nullable(std::make_shared(string_type, int_type)); + const auto struct_type = make_nullable(std::make_shared( + DataTypes {int_type, make_nullable(std::make_shared()), string_type}, + Strings {"f1", "f2", "f3"})); + + TDescriptorTable thrift_desc_tbl; + TTupleDescriptor tuple_desc; + tuple_desc.__set_id(0); + tuple_desc.__set_byteSize(0); + tuple_desc.__set_numNullBytes(1); + thrift_desc_tbl.tupleDescriptors.push_back(std::move(tuple_desc)); + thrift_desc_tbl.slotDescriptors.push_back( + remote_complex_slot_descriptor(0, array_type, "c_array_s")); + thrift_desc_tbl.slotDescriptors.push_back(remote_complex_slot_descriptor(1, map_type, "c_map")); + thrift_desc_tbl.slotDescriptors.push_back( + remote_complex_slot_descriptor(2, struct_type, "c_struct")); + auto status = DescriptorTbl::create(pool, thrift_desc_tbl, desc_tbl); + EXPECT_TRUE(status.ok()) << status; + return (*desc_tbl)->get_tuple_descriptor(0)->slots(); +} + +std::shared_ptr make_batch(const std::vector& names) { + arrow::Int32Builder id_builder; + EXPECT_TRUE(id_builder.Append(10).ok()); + EXPECT_TRUE(id_builder.Append(20).ok()); + std::shared_ptr id_array; + EXPECT_TRUE(id_builder.Finish(&id_array).ok()); + + arrow::StringBuilder name_builder; + EXPECT_TRUE(name_builder.Append("alice").ok()); + EXPECT_TRUE(name_builder.Append("bob").ok()); + std::shared_ptr name_array; + EXPECT_TRUE(name_builder.Finish(&name_array).ok()); + + std::vector> fields; + std::vector> arrays; + for (const auto& name : names) { + if (name == "id") { + fields.push_back(arrow::field("id", arrow::int32())); + arrays.push_back(id_array); + } else if (name == "name") { + fields.push_back(arrow::field("name", arrow::utf8())); + arrays.push_back(name_array); + } else { + fields.push_back(arrow::field(name, arrow::int32())); + arrays.push_back(id_array); + } + } + return arrow::RecordBatch::Make(arrow::schema(std::move(fields)), 2, std::move(arrays)); +} + +std::unique_ptr create_reader( + RuntimeProfile* profile, const TFileRangeDesc& range, + const std::vector& slots, + std::vector> batches, std::shared_ptr close_count, + std::shared_ptr io_ctx = nullptr) { + auto system_properties = std::make_shared(); + auto file_description = std::make_unique(); + file_description->path = "/dummyPath"; + auto factory = [batches = std::move(batches), close_count]( + const TFileRangeDesc&, + std::unique_ptr* stream) mutable { + *stream = std::make_unique(std::move(batches), close_count); + return Status::OK(); + }; + return std::make_unique(system_properties, file_description, + std::move(io_ctx), profile, range, slots, + std::move(factory)); +} + +Block make_request_block(const std::vector& schema, + const std::vector& local_ids) { + Block block; + for (const auto local_id : local_ids) { + const auto it = std::find_if(schema.begin(), schema.end(), [&](const auto& column) { + return column.local_id == local_id; + }); + DORIS_CHECK(it != schema.end()); + block.insert({it->type->create_column(), it->type, it->name}); + } + return block; +} + +int32_t nullable_int_at(const IColumn& column, size_t row) { + const auto& nullable = assert_cast(column); + const auto& nested = assert_cast(nullable.get_nested_column()); + return nested.get_data()[row]; +} + +std::string nullable_string_at(const IColumn& column, size_t row) { + const auto& nullable = assert_cast(column); + const auto& nested = assert_cast(nullable.get_nested_column()); + return nested.get_data_at(row).to_string(); +} + +class NullableIntGreaterThanExpr final : public VExpr { +public: + NullableIntGreaterThanExpr(size_t block_position, int32_t value) + : VExpr(std::make_shared(), false), + _block_position(block_position), + _value(value) {} + + const std::string& expr_name() const override { return _name; } + + bool is_constant() const override { return false; } + + Status execute_column_impl(VExprContext*, const Block* block, const Selector* selector, + size_t count, ColumnPtr& result_column) const override { + DORIS_CHECK(block != nullptr); + const auto& nullable = + assert_cast(*block->get_by_position(_block_position).column); + const auto& data = assert_cast(nullable.get_nested_column()); + + auto result = ColumnUInt8::create(); + auto& result_data = result->get_data(); + result_data.resize(count); + for (size_t row = 0; row < count; ++row) { + const auto source_row = selector == nullptr ? row : (*selector)[row]; + result_data[row] = + !nullable.is_null_at(source_row) && data.get_element(source_row) > _value; + } + result_column = std::move(result); + return Status::OK(); + } + + Status clone_node(VExprSPtr* cloned_expr) const override { + DORIS_CHECK(cloned_expr != nullptr); + *cloned_expr = std::make_shared(_block_position, _value); + return Status::OK(); + } + +private: + size_t _block_position; + int32_t _value; + const std::string _name = "NullableIntGreaterThanExpr"; +}; + +VExprContextSPtr prepared_conjunct(RuntimeState* state, const VExprSPtr& expr) { + auto context = VExprContext::create_shared(expr); + auto status = context->prepare(state, RowDescriptor()); + EXPECT_TRUE(status.ok()) << status; + status = context->open(state); + EXPECT_TRUE(status.ok()) << status; + return context; +} + +} // namespace + +TEST(RemoteDorisV2ReaderTest, BuildsSchemaFromSlotsAndProjectsRequestedColumns) { + ObjectPool pool; + DescriptorTbl* desc_tbl = nullptr; + const auto slots = remote_slots(&pool, &desc_tbl); + RuntimeState state; + RuntimeProfile profile("remote_doris_v2_reader_test"); + auto close_count = std::make_shared(0); + auto reader = create_reader(&profile, remote_doris_range(), slots, {make_batch({"id", "name"})}, + close_count); + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 2); + EXPECT_EQ(schema[0].name, "id"); + EXPECT_EQ(schema[0].local_id, 0); + EXPECT_EQ(schema[1].name, "name"); + EXPECT_EQ(schema[1].local_id, 1); + + auto request = std::make_shared(); + FileScanRequestBuilder builder(request.get()); + ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(1)).ok()); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_request_block(schema, {1}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 2); + EXPECT_FALSE(eof); + EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "alice"); + EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 1), "bob"); + + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + EXPECT_EQ(rows, 0); + EXPECT_TRUE(eof); + ASSERT_TRUE(reader->close().ok()); + EXPECT_EQ(*close_count, 1); +} + +TEST(RemoteDorisV2ReaderTest, BuildsComplexSchemaChildrenFromSlots) { + ObjectPool pool; + DescriptorTbl* desc_tbl = nullptr; + const auto slots = remote_complex_slots(&pool, &desc_tbl); + RuntimeState state; + RuntimeProfile profile("remote_doris_v2_reader_complex_schema_test"); + auto close_count = std::make_shared(0); + auto reader = create_reader(&profile, remote_doris_range(), slots, {}, close_count); + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + ASSERT_EQ(schema.size(), 3); + + ASSERT_EQ(schema[0].name, "c_array_s"); + ASSERT_EQ(schema[0].children.size(), 1); + EXPECT_EQ(schema[0].children[0].name, "element"); + EXPECT_EQ(schema[0].children[0].local_id, 0); + EXPECT_TRUE(schema[0].children[0].children.empty()); + + ASSERT_EQ(schema[1].name, "c_map"); + ASSERT_EQ(schema[1].children.size(), 2); + EXPECT_EQ(schema[1].children[0].name, "key"); + EXPECT_EQ(schema[1].children[0].local_id, 0); + EXPECT_EQ(schema[1].children[1].name, "value"); + EXPECT_EQ(schema[1].children[1].local_id, 1); + + ASSERT_EQ(schema[2].name, "c_struct"); + ASSERT_EQ(schema[2].children.size(), 3); + EXPECT_EQ(schema[2].children[0].name, "f1"); + EXPECT_EQ(schema[2].children[0].local_id, 0); + EXPECT_EQ(schema[2].children[1].name, "f2"); + EXPECT_EQ(schema[2].children[1].local_id, 1); + EXPECT_EQ(schema[2].children[2].name, "f3"); + EXPECT_EQ(schema[2].children[2].local_id, 2); +} + +TEST(RemoteDorisV2ReaderTest, HandlesDifferentArrowColumnOrder) { + ObjectPool pool; + DescriptorTbl* desc_tbl = nullptr; + const auto slots = remote_slots(&pool, &desc_tbl); + RuntimeState state; + RuntimeProfile profile("remote_doris_v2_reader_reordered_test"); + auto close_count = std::make_shared(0); + auto reader = create_reader(&profile, remote_doris_range(), slots, {make_batch({"name", "id"})}, + close_count); + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + auto request = std::make_shared(); + FileScanRequestBuilder builder(request.get()); + ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(1)).ok()); + ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(0)).ok()); + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_request_block(schema, {1, 0}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 2); + EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "alice"); + EXPECT_EQ(nullable_int_at(*block.get_by_position(1).column, 1), 20); +} + +TEST(RemoteDorisV2ReaderTest, AppliesConjunctsAndTracksPredicateFilteredRows) { + ObjectPool pool; + DescriptorTbl* desc_tbl = nullptr; + const auto slots = remote_slots(&pool, &desc_tbl); + RuntimeState state; + RuntimeProfile profile("remote_doris_v2_reader_filter_test"); + auto close_count = std::make_shared(0); + auto io_ctx = std::make_shared(); + auto reader = create_reader(&profile, remote_doris_range(), slots, {make_batch({"id", "name"})}, + close_count, io_ctx); + ASSERT_TRUE(reader->init(&state).ok()); + + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + auto request = std::make_shared(); + FileScanRequestBuilder builder(request.get()); + ASSERT_TRUE(builder.add_predicate_column(LocalColumnId(0)).ok()); + ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(1)).ok()); + request->conjuncts = { + prepared_conjunct(&state, std::make_shared(0, 10))}; + ASSERT_TRUE(reader->open(request).ok()); + + auto block = make_request_block(schema, {0, 1}); + size_t rows = 0; + bool eof = false; + ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok()); + ASSERT_EQ(rows, 1); + EXPECT_FALSE(eof); + EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 20); + EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 0), "bob"); + EXPECT_EQ(io_ctx->predicate_filtered_rows, 1); +} + +TEST(RemoteDorisV2ReaderTest, RejectsUnknownReturnedColumnAndMissingRequestedColumn) { + ObjectPool pool; + DescriptorTbl* desc_tbl = nullptr; + const auto slots = remote_slots(&pool, &desc_tbl); + RuntimeState state; + RuntimeProfile profile("remote_doris_v2_reader_error_test"); + + { + auto close_count = std::make_shared(0); + auto reader = create_reader(&profile, remote_doris_range(), slots, + {make_batch({"unknown"})}, close_count); + ASSERT_TRUE(reader->init(&state).ok()); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + auto request = std::make_shared(); + FileScanRequestBuilder builder(request.get()); + ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(0)).ok()); + ASSERT_TRUE(reader->open(request).ok()); + auto block = make_request_block(schema, {0}); + size_t rows = 0; + bool eof = false; + EXPECT_FALSE(reader->get_block(&block, &rows, &eof).ok()); + } + + { + auto close_count = std::make_shared(0); + auto reader = create_reader(&profile, remote_doris_range(), slots, {make_batch({"id"})}, + close_count); + ASSERT_TRUE(reader->init(&state).ok()); + std::vector schema; + ASSERT_TRUE(reader->get_schema(&schema).ok()); + auto request = std::make_shared(); + FileScanRequestBuilder builder(request.get()); + ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(1)).ok()); + ASSERT_TRUE(reader->open(request).ok()); + auto block = make_request_block(schema, {1}); + size_t rows = 0; + bool eof = false; + EXPECT_FALSE(reader->get_block(&block, &rows, &eof).ok()); + } +} + +TEST(RemoteDorisV2ReaderTest, RejectsInvalidRemoteDorisRange) { + ObjectPool pool; + DescriptorTbl* desc_tbl = nullptr; + const auto slots = remote_slots(&pool, &desc_tbl); + RuntimeState state; + RuntimeProfile profile("remote_doris_v2_reader_bad_range_test"); + auto range = remote_doris_range(); + range.table_format_params.__isset.remote_doris_params = false; + auto close_count = std::make_shared(0); + auto reader = create_reader(&profile, range, slots, {}, close_count); + EXPECT_FALSE(reader->init(&state).ok()); +} + +} // namespace doris::format::remote_doris diff --git a/be/test/format_v2/table_reader_request_test.cpp b/be/test/format_v2/table_reader_request_test.cpp new file mode 100644 index 00000000000000..3845e086cea1b1 --- /dev/null +++ b/be/test/format_v2/table_reader_request_test.cpp @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "format_v2/table_reader.h" + +namespace doris::format { +namespace { + +class TableReaderRequestTestHelper final : public TableReader { +public: + using TableReader::_append_file_scan_column; +}; + +// Scenario: FileScanRequestBuilder owns request-local block positions and merges repeated nested +// projections for the same root. ColumnMapper can focus on producing file-local projection trees. +TEST(FileScanRequestBuilderTest, MergesNestedProjectionAndKeepsStableBlockPosition) { + FileScanRequest request; + FileScanRequestBuilder builder(&request); + + auto name_projection = LocalColumnIndex::partial_local(5); + name_projection.children.push_back(LocalColumnIndex::local(2)); + ASSERT_TRUE(builder.add_non_predicate_column(std::move(name_projection)).ok()); + + auto id_projection = LocalColumnIndex::partial_local(5); + id_projection.children.push_back(LocalColumnIndex::local(0)); + ASSERT_TRUE(builder.add_non_predicate_column(std::move(id_projection)).ok()); + + ASSERT_EQ(request.local_positions.size(), 1); + EXPECT_EQ(request.local_positions.at(LocalColumnId(5)).value(), 0); + ASSERT_EQ(request.non_predicate_columns.size(), 1); + const auto& projection = request.non_predicate_columns[0]; + EXPECT_EQ(projection.column_id(), LocalColumnId(5)); + ASSERT_FALSE(projection.project_all_children); + ASSERT_EQ(projection.children.size(), 2); + EXPECT_EQ(projection.children[0].local_id(), 0); + EXPECT_EQ(projection.children[1].local_id(), 2); +} + +// Scenario: predicate scan columns dominate non-predicate columns because file readers return +// predicate columns in the same file-local block and TableReader can reuse them for output. +TEST(FileScanRequestBuilderTest, PredicateColumnRemovesDuplicateNonPredicateColumn) { + FileScanRequest request; + FileScanRequestBuilder builder(&request); + + ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(1)).ok()); + ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(2)).ok()); + ASSERT_TRUE(builder.add_predicate_column(LocalColumnId(1)).ok()); + + ASSERT_EQ(request.local_positions.size(), 2); + EXPECT_EQ(request.local_positions.at(LocalColumnId(1)).value(), 0); + EXPECT_EQ(request.local_positions.at(LocalColumnId(2)).value(), 1); + ASSERT_EQ(request.predicate_columns.size(), 1); + EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(1)); + ASSERT_EQ(request.non_predicate_columns.size(), 1); + EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(2)); +} + +// Scenario: TableReader's format-specific customization path delegates to FileScanRequestBuilder +// and preserves the same predicate/non-predicate de-duplication rule. +TEST(TableReaderRequestTest, AppendPredicateColumnKeepsOtherNonPredicateColumns) { + TableReaderRequestTestHelper reader; + FileScanRequest request; + + reader._append_file_scan_column(&request, LocalColumnId(1), &request.non_predicate_columns); + reader._append_file_scan_column(&request, LocalColumnId(2), &request.non_predicate_columns); + reader._append_file_scan_column(&request, LocalColumnId(1), &request.predicate_columns); + + ASSERT_EQ(request.local_positions.size(), 2); + EXPECT_EQ(request.local_positions.at(LocalColumnId(1)).value(), 0); + EXPECT_EQ(request.local_positions.at(LocalColumnId(2)).value(), 1); + + ASSERT_EQ(request.predicate_columns.size(), 1); + EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(1)); + + ASSERT_EQ(request.non_predicate_columns.size(), 1); + EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(2)); +} + +} // namespace +} // namespace doris::format diff --git a/be/test/format_v2/table_reader_test.cpp b/be/test/format_v2/table_reader_test.cpp new file mode 100644 index 00000000000000..a8659667688d4c --- /dev/null +++ b/be/test/format_v2/table_reader_test.cpp @@ -0,0 +1,3826 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "format_v2/table_reader.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/consts.h" +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_array.h" +#include "core/column/column_const.h" +#include "core/column/column_map.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_struct.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_array.h" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "core/data_type/data_type_struct.h" +#include "exprs/runtime_filter_expr.h" +#include "exprs/vectorized_fn_call.h" +#include "exprs/vexpr.h" +#include "exprs/vliteral.h" +#include "exprs/vslot_ref.h" +#include "gen_cpp/Exprs_types.h" +#include "gen_cpp/ExternalTableSchema_types.h" +#include "gen_cpp/PlanNodes_types.h" +#include "io/io_common.h" +#include "runtime/runtime_profile.h" +#include "runtime/runtime_state.h" +#include "storage/predicate/predicate_creator.h" +#include "storage/segment/condition_cache.h" + +namespace doris::format { +namespace { + +std::vector projection_ids(const std::vector& projections) { + std::vector ids; + ids.reserve(projections.size()); + for (const auto& projection : projections) { + ids.push_back(projection.index); + } + return ids; +} + +TEST(LocalColumnIndexTest, MergeUnionsPartialChildrenAndFullProjectionDominates) { + LocalColumnIndex target {.index = 10, .project_all_children = false}; + target.children.push_back({.index = 1}); + target.children.push_back({.index = 2, .project_all_children = false}); + target.children.back().children.push_back({.index = 20}); + + LocalColumnIndex source {.index = 10, .project_all_children = false}; + source.children.push_back({.index = 2, .project_all_children = false}); + source.children.back().children.push_back({.index = 21}); + source.children.push_back({.index = 3}); + + ASSERT_TRUE(merge_local_column_index(&target, source).ok()); + ASSERT_FALSE(target.project_all_children); + ASSERT_EQ(std::vector({1, 2, 3}), projection_ids(target.children)); + ASSERT_FALSE(target.children[1].project_all_children); + ASSERT_EQ(std::vector({20, 21}), projection_ids(target.children[1].children)); + ASSERT_TRUE(target.children[2].project_all_children); + + LocalColumnIndex full_source {.index = 10}; + ASSERT_TRUE(merge_local_column_index(&target, full_source).ok()); + ASSERT_TRUE(target.project_all_children); + ASSERT_TRUE(target.children.empty()); +} + +TEST(LocalColumnIndexTest, FindsProjectedChildren) { + LocalColumnIndex projection {.index = 10, .project_all_children = false}; + projection.children.push_back({.index = 1}); + projection.children.push_back({.index = 2}); + + EXPECT_TRUE(is_full_projection(nullptr)); + EXPECT_FALSE(is_full_projection(&projection)); + EXPECT_TRUE(is_partial_projection(&projection)); + ASSERT_NE(find_child_projection(&projection, 2), nullptr); + EXPECT_EQ(find_child_projection(&projection, 2)->local_id(), 2); + EXPECT_EQ(find_child_projection(&projection, 3), nullptr); + EXPECT_TRUE(is_child_projected(nullptr, 3)); + EXPECT_TRUE(is_child_projected(&projection, 1)); + EXPECT_FALSE(is_child_projected(&projection, 3)); +} + +TEST(LocalColumnIndexTest, ProjectColumnDefinitionMatchesChildrenByLocalId) { + auto int_type = std::make_shared(); + auto string_type = std::make_shared(); + ColumnDefinition field; + field.identifier = Field::create_field(5); + field.name = "root"; + field.type = + std::make_shared(DataTypes {int_type, string_type}, Strings {"a", "b"}); + ColumnDefinition a_child; + a_child.identifier = Field::create_field(10); + a_child.local_id = 0; + a_child.name = "a"; + a_child.type = int_type; + ColumnDefinition b_child; + b_child.identifier = Field::create_field(20); + b_child.local_id = 1; + b_child.name = "b"; + b_child.type = string_type; + field.children = { + a_child, + b_child, + }; + LocalColumnIndex projection {.index = 5, .project_all_children = false}; + projection.children.push_back({.index = 1}); + + ColumnDefinition projected_field; + ASSERT_TRUE(project_column_definition(field, projection, &projected_field).ok()); + ASSERT_EQ(projected_field.children.size(), 1); + EXPECT_EQ(projected_field.children[0].get_identifier_field_id(), 20); + EXPECT_EQ(projected_field.children[0].name, "b"); + + const auto* projected_type = + assert_cast(remove_nullable(projected_field.type).get()); + ASSERT_EQ(projected_type->get_elements().size(), 1); + EXPECT_EQ(projected_type->get_element_name(0), "b"); + EXPECT_TRUE(projected_type->get_element(0)->equals(*string_type)); +} + +TEST(LocalColumnIndexTest, ProjectColumnDefinitionKeepsFileChildOrder) { + auto int_type = std::make_shared(); + auto string_type = std::make_shared(); + ColumnDefinition a_child; + a_child.identifier = Field::create_field(10); + a_child.local_id = 0; + a_child.name = "a"; + a_child.type = int_type; + ColumnDefinition b_child; + b_child.identifier = Field::create_field(20); + b_child.local_id = 1; + b_child.name = "b"; + b_child.type = string_type; + + ColumnDefinition field; + field.identifier = Field::create_field(5); + field.name = "root"; + field.type = + std::make_shared(DataTypes {int_type, string_type}, Strings {"a", "b"}); + field.children = {a_child, b_child}; + + LocalColumnIndex projection {.index = 5, .project_all_children = false}; + projection.children.push_back({.index = 1}); + projection.children.push_back({.index = 0}); + + ColumnDefinition projected_field; + ASSERT_TRUE(project_column_definition(field, projection, &projected_field).ok()); + ASSERT_EQ(projected_field.children.size(), 2); + EXPECT_EQ(projected_field.children[0].name, "a"); + EXPECT_EQ(projected_field.children[1].name, "b"); + + const auto* projected_type = + assert_cast(remove_nullable(projected_field.type).get()); + ASSERT_EQ(projected_type->get_elements().size(), 2); + EXPECT_EQ(projected_type->get_element_name(0), "a"); + EXPECT_EQ(projected_type->get_element_name(1), "b"); +} + +VExprSPtr table_int32_slot_ref(int slot_id, int column_id, const std::string& column_name) { + const auto nullable_int_type = make_nullable(std::make_shared()); + return VSlotRef::create_shared(slot_id, column_id, slot_id, nullable_int_type, column_name); +} + +VExprSPtr table_int32_literal(int32_t value) { + return VLiteral::create_shared(std::make_shared(), + Field::create_field(value)); +} + +TExprNode table_function_node(const std::string& function_name, const DataTypePtr& return_type, + const std::vector& arg_types, + TExprNodeType::type node_type, + TExprOpcode::type opcode = TExprOpcode::INVALID_OPCODE, + bool short_circuit_evaluation = false) { + TFunctionName fn_name; + fn_name.__set_function_name(function_name); + TFunction fn; + fn.__set_name(fn_name); + fn.__set_binary_type(TFunctionBinaryType::BUILTIN); + std::vector thrift_arg_types; + thrift_arg_types.reserve(arg_types.size()); + for (const auto& arg_type : arg_types) { + thrift_arg_types.push_back(arg_type->to_thrift()); + } + fn.__set_arg_types(thrift_arg_types); + fn.__set_ret_type(return_type->to_thrift()); + fn.__set_has_var_args(false); + + TExprNode node; + node.__set_node_type(node_type); + node.__set_opcode(opcode); + node.__set_type(return_type->to_thrift()); + node.__set_fn(fn); + node.__set_num_children(static_cast(arg_types.size())); + node.__set_is_nullable(return_type->is_nullable()); + if (short_circuit_evaluation) { + node.__set_short_circuit_evaluation(true); + } + return node; +} + +VExprSPtr create_expr_from_node(const TExprNode& node) { + VExprSPtr expr; + auto status = VExpr::create_expr(node, expr); + DORIS_CHECK(status.ok()) << status.to_string(); + return expr; +} + +VExprSPtr table_function_expr(const std::string& function_name, const DataTypePtr& return_type, + const std::vector& arg_types, + TExprNodeType::type node_type = TExprNodeType::FUNCTION_CALL, + TExprOpcode::type opcode = TExprOpcode::INVALID_OPCODE) { + const auto node = table_function_node(function_name, return_type, arg_types, node_type, opcode); + return VectorizedFnCall::create_shared(node); +} + +VExprSPtr table_int32_greater_than_expr(int slot_id, int column_id, int32_t value) { + const auto int_type = std::make_shared(); + const auto nullable_int_type = make_nullable(int_type); + auto expr = table_function_expr("gt", make_nullable(std::make_shared()), + {nullable_int_type, int_type}, TExprNodeType::BINARY_PRED, + TExprOpcode::GT); + expr->add_child(table_int32_slot_ref(slot_id, column_id, "id")); + expr->add_child(table_int32_literal(value)); + return expr; +} + +VExprSPtr runtime_filter_wrapper_expr(VExprSPtr impl) { + TExprNode node; + node.__set_node_type(TExprNodeType::SLOT_REF); + node.__set_type(std::make_shared()->to_thrift()); + node.__set_num_children(1); + return RuntimeFilterExpr::create_shared(node, std::move(impl), 0, false, /*filter_id=*/1); +} + +class NullableArrayBigintDefaultExpr final : public VExpr { +public: + explicit NullableArrayBigintDefaultExpr(DataTypePtr data_type) + : _name("single_element_groups") { + _data_type = std::move(data_type); + } + + const std::string& expr_name() const override { return _name; } + + bool is_constant() const override { return false; } + + Status execute_column_impl(VExprContext*, const Block*, const Selector* selector, size_t count, + ColumnPtr& result_column) const override { + DCHECK(selector == nullptr || selector->size() == count); + auto values = ColumnInt64::create(); + auto offsets = ColumnArray::ColumnOffsets::create(); + auto null_map = ColumnUInt8::create(); + for (size_t i = 0; i < count; ++i) { + values->insert_value(7); + offsets->insert_value(static_cast(i + 1)); + null_map->insert_value(0); + } + auto array_column = ColumnArray::create(std::move(values), std::move(offsets)); + result_column = ColumnNullable::create(std::move(array_column), std::move(null_map)); + return Status::OK(); + } + +private: + std::string _name; +}; + +class TableReaderMaterializeTestHelper final : public TableReader { +public: + using TableReader::_materialize_map_mapping_column; +}; + +VExprSPtr table_int32_sum_expr(int left_slot_id, int left_column_id, int right_slot_id, + int right_column_id) { + const auto int_type = std::make_shared(); + const auto nullable_int_type = make_nullable(int_type); + auto expr = + table_function_expr("add", nullable_int_type, {nullable_int_type, nullable_int_type}); + expr->add_child(table_int32_slot_ref(left_slot_id, left_column_id, "id")); + expr->add_child(table_int32_slot_ref(right_slot_id, right_column_id, "score")); + return expr; +} + +VExprSPtr table_int32_sum_greater_than_expr(int left_slot_id, int left_column_id, int right_slot_id, + int right_column_id, int32_t value) { + const auto int_type = std::make_shared(); + const auto nullable_int_type = make_nullable(int_type); + auto expr = table_function_expr("gt", make_nullable(std::make_shared()), + {nullable_int_type, int_type}, TExprNodeType::BINARY_PRED, + TExprOpcode::GT); + expr->add_child( + table_int32_sum_expr(left_slot_id, left_column_id, right_slot_id, right_column_id)); + expr->add_child(table_int32_literal(value)); + return expr; +} + +VExprSPtr table_condition_function_expr(const std::string& function_name, bool short_circuit) { + const auto int_type = std::make_shared(); + std::vector arg_types; + if (function_name == "if") { + arg_types = {std::make_shared(), int_type, int_type}; + } else { + arg_types = {int_type, int_type}; + } + auto expr = create_expr_from_node( + table_function_node(function_name, int_type, arg_types, TExprNodeType::FUNCTION_CALL, + TExprOpcode::INVALID_OPCODE, short_circuit)); + if (function_name == "if") { + expr->add_child(table_int32_greater_than_expr(0, 0, 0)); + expr->add_child(table_int32_literal(1)); + expr->add_child(table_int32_literal(0)); + } else { + expr->add_child(table_int32_slot_ref(0, 0, "id")); + expr->add_child(table_int32_literal(0)); + } + return expr; +} + +VExprSPtr table_case_expr(bool short_circuit) { + const auto int_type = std::make_shared(); + TCaseExpr case_node; + case_node.__set_has_case_expr(false); + case_node.__set_has_else_expr(true); + + TExprNode node; + node.__set_node_type(TExprNodeType::CASE_EXPR); + node.__set_type(int_type->to_thrift()); + node.__set_is_nullable(false); + node.__set_num_children(3); + node.__set_case_expr(case_node); + if (short_circuit) { + node.__set_short_circuit_evaluation(true); + } + + auto expr = create_expr_from_node(node); + expr->add_child(table_int32_greater_than_expr(0, 0, 0)); + expr->add_child(table_int32_literal(1)); + expr->add_child(table_int32_literal(0)); + return expr; +} + +TEST(CloneTableExprTreeTest, ClonesConditionalExpressions) { + const std::vector expressions { + table_condition_function_expr("if", false), + table_condition_function_expr("if", true), + table_condition_function_expr("ifnull", false), + table_condition_function_expr("ifnull", true), + table_condition_function_expr("coalesce", false), + table_condition_function_expr("coalesce", true), + table_case_expr(false), + table_case_expr(true), + }; + + for (const auto& expr : expressions) { + VExprSPtr cloned; + const auto status = clone_table_expr_tree(expr, &cloned); + ASSERT_TRUE(status.ok()) << expr->debug_string() << ": " << status.to_string(); + ASSERT_NE(cloned, nullptr); + const auto* original_expr = expr.get(); + const auto* cloned_expr = cloned.get(); + EXPECT_TRUE(typeid(*original_expr) == typeid(*cloned_expr)) + << expr->expr_name() << " cloned as " << typeid(*cloned_expr).name(); + EXPECT_EQ(expr->expr_name(), cloned->expr_name()); + EXPECT_EQ(expr->get_num_children(), cloned->get_num_children()); + EXPECT_NE(original_expr, cloned_expr); + } +} + +// Scenario: cloning a VectorizedFnCall whose return type is complex must not reconstruct the expr +// from TExprNode, because DataTypeFactory rejects nested types through the primitive-type path. +TEST(CloneTableExprTreeTest, ClonesVectorizedFnCallWithComplexReturnType) { + const auto int_type = std::make_shared(); + const auto string_type = std::make_shared(); + const auto struct_type = + std::make_shared(DataTypes {int_type, string_type}, Strings {"a", "b"}); + const auto array_type = std::make_shared(struct_type); + + auto expr = table_function_expr("element_at", struct_type, {array_type, int_type}); + expr->add_child(VSlotRef::create_shared(0, 0, -1, array_type, "array_of_struct")); + expr->add_child(table_int32_literal(1)); + + VExprSPtr cloned; + const auto status = clone_table_expr_tree(expr, &cloned); + ASSERT_TRUE(status.ok()) << status.to_string(); + ASSERT_NE(cloned, nullptr); + EXPECT_EQ(cloned->expr_name(), expr->expr_name()); + EXPECT_TRUE(cloned->data_type()->equals(*struct_type)); + EXPECT_EQ(cloned->get_num_children(), 2); + EXPECT_NE(cloned.get(), expr.get()); +} + +std::shared_ptr finish_array(arrow::ArrayBuilder* builder) { + std::shared_ptr array; + EXPECT_TRUE(builder->Finish(&array).ok()); + return array; +} + +std::shared_ptr build_int32_array(const std::vector& values) { + arrow::Int32Builder builder; + for (const auto value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return finish_array(&builder); +} + +std::shared_ptr build_string_array(const std::vector& values) { + arrow::StringBuilder builder; + for (const auto& value : values) { + EXPECT_TRUE(builder.Append(value).ok()); + } + return finish_array(&builder); +} + +void write_parquet_file(const std::string& file_path, int32_t id, const std::string& value) { + auto schema = arrow::schema({ + arrow::field("id", arrow::int32(), false), + arrow::field("value", arrow::utf8(), false), + }); + auto table = arrow::Table::Make(schema, {build_int32_array({id}), build_string_array({value})}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1, + builder.build())); +} + +void write_struct_parquet_file(const std::string& file_path, int32_t id) { + auto struct_type = arrow::struct_({arrow::field("id", arrow::int32(), false)}); + arrow::StructBuilder builder( + struct_type, arrow::default_memory_pool(), + {std::make_shared(arrow::default_memory_pool())}); + auto* id_builder = assert_cast(builder.field_builder(0)); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(id_builder->Append(id).ok()); + + auto schema = arrow::schema({ + arrow::field("s", struct_type, false), + }); + auto table = arrow::Table::Make(schema, {finish_array(&builder)}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder writer_builder; + writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6); + writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + writer_builder.compression(::parquet::Compression::UNCOMPRESSED); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1, + writer_builder.build())); +} + +void write_struct_parquet_file(const std::string& file_path, const std::vector& ids, + int64_t row_group_size = -1) { + auto struct_type = arrow::struct_({arrow::field("id", arrow::int32(), false)}); + arrow::StructBuilder builder( + struct_type, arrow::default_memory_pool(), + {std::make_shared(arrow::default_memory_pool())}); + auto* id_builder = assert_cast(builder.field_builder(0)); + for (const auto id : ids) { + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(id_builder->Append(id).ok()); + } + + auto schema = arrow::schema({ + arrow::field("s", struct_type, false), + }); + auto table = arrow::Table::Make(schema, {finish_array(&builder)}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder writer_builder; + writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6); + writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + writer_builder.compression(::parquet::Compression::UNCOMPRESSED); + const auto write_row_group_size = + row_group_size > 0 ? row_group_size : static_cast(ids.size()); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + write_row_group_size, + writer_builder.build())); +} + +void write_struct_with_nullable_child_parquet_file(const std::string& file_path) { + auto struct_type = arrow::struct_({ + arrow::field("id", arrow::int32(), false), + arrow::field("note", arrow::utf8(), true), + }); + std::vector> field_builders; + auto id_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(id_builder))); + auto note_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(note_builder))); + arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(), + std::move(field_builders)); + auto* struct_id_builder = assert_cast(builder.field_builder(0)); + auto* struct_note_builder = assert_cast(builder.field_builder(1)); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(struct_id_builder->Append(7).ok()); + EXPECT_TRUE(struct_note_builder->Append("seven").ok()); + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(struct_id_builder->Append(8).ok()); + EXPECT_TRUE(struct_note_builder->AppendNull().ok()); + + auto schema = arrow::schema({ + arrow::field("s", struct_type, false), + }); + auto table = arrow::Table::Make(schema, {finish_array(&builder)}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder writer_builder; + writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6); + writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + writer_builder.compression(::parquet::Compression::UNCOMPRESSED); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 2, + writer_builder.build())); +} + +void write_list_struct_parquet_file(const std::string& file_path) { + auto struct_type = arrow::struct_( + {arrow::field("a", arrow::int32(), false), arrow::field("b", arrow::int32(), false)}); + std::vector> field_builders; + auto a_array_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(a_array_builder))); + auto b_array_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(b_array_builder))); + auto struct_builder = std::make_shared( + struct_type, arrow::default_memory_pool(), std::move(field_builders)); + auto list_type = arrow::list(arrow::field("element", struct_type, true)); + arrow::ListBuilder builder(arrow::default_memory_pool(), struct_builder, list_type); + auto* a_builder = assert_cast(struct_builder->field_builder(0)); + auto* b_builder = assert_cast(struct_builder->field_builder(1)); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(struct_builder->Append().ok()); + EXPECT_TRUE(a_builder->Append(10).ok()); + EXPECT_TRUE(b_builder->Append(11).ok()); + EXPECT_TRUE(struct_builder->Append().ok()); + EXPECT_TRUE(a_builder->Append(20).ok()); + EXPECT_TRUE(b_builder->Append(21).ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(struct_builder->Append().ok()); + EXPECT_TRUE(a_builder->Append(30).ok()); + EXPECT_TRUE(b_builder->Append(31).ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(struct_builder->Append().ok()); + EXPECT_TRUE(a_builder->Append(40).ok()); + EXPECT_TRUE(b_builder->Append(41).ok()); + + auto schema = arrow::schema({ + arrow::field("xs", list_type, false), + }); + auto table = arrow::Table::Make(schema, {finish_array(&builder)}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder writer_builder; + writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6); + writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + writer_builder.compression(::parquet::Compression::UNCOMPRESSED); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 3, + writer_builder.build())); +} + +void write_map_struct_parquet_file(const std::string& file_path) { + auto key_builder = std::make_shared(); + auto struct_type = arrow::struct_( + {arrow::field("a", arrow::int32(), false), arrow::field("b", arrow::utf8(), false)}); + std::vector> field_builders; + auto a_array_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(a_array_builder))); + auto b_array_builder = std::make_unique(); + field_builders.push_back(std::shared_ptr(std::move(b_array_builder))); + auto value_builder = std::make_shared( + struct_type, arrow::default_memory_pool(), std::move(field_builders)); + auto map_type = arrow::map(arrow::int32(), arrow::field("value", struct_type, false)); + arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, value_builder, map_type); + auto* a_builder = assert_cast(value_builder->field_builder(0)); + auto* b_builder = assert_cast(value_builder->field_builder(1)); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(1).ok()); + EXPECT_TRUE(value_builder->Append().ok()); + EXPECT_TRUE(a_builder->Append(10).ok()); + EXPECT_TRUE(b_builder->Append("ma").ok()); + EXPECT_TRUE(key_builder->Append(2).ok()); + EXPECT_TRUE(value_builder->Append().ok()); + EXPECT_TRUE(a_builder->Append(20).ok()); + EXPECT_TRUE(b_builder->Append("mb").ok()); + + EXPECT_TRUE(builder.Append().ok()); + EXPECT_TRUE(key_builder->Append(3).ok()); + EXPECT_TRUE(value_builder->Append().ok()); + EXPECT_TRUE(a_builder->Append(30).ok()); + EXPECT_TRUE(b_builder->Append("mc").ok()); + + EXPECT_TRUE(builder.AppendEmptyValue().ok()); + + auto schema = arrow::schema({ + arrow::field("kv", map_type, false), + }); + auto table = arrow::Table::Make(schema, {finish_array(&builder)}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder writer_builder; + writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6); + writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + writer_builder.compression(::parquet::Compression::UNCOMPRESSED); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 3, + writer_builder.build())); +} + +void write_int_pair_parquet_file(const std::string& file_path, const std::vector& ids, + const std::vector& scores, + const std::vector& values, + int64_t row_group_size = -1) { + const auto id_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"0"}); + const auto score_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"1"}); + const auto value_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"2"}); + auto schema = arrow::schema({ + arrow::field("id", arrow::int32(), false)->WithMetadata(id_metadata), + arrow::field("score", arrow::int32(), false)->WithMetadata(score_metadata), + arrow::field("value", arrow::utf8(), false)->WithMetadata(value_metadata), + }); + auto table = arrow::Table::Make(schema, {build_int32_array(ids), build_int32_array(scores), + build_string_array(values)}); + + auto file_result = arrow::io::FileOutputStream::Open(file_path); + ASSERT_TRUE(file_result.ok()) << file_result.status(); + std::shared_ptr out = *file_result; + + ::parquet::WriterProperties::Builder builder; + builder.version(::parquet::ParquetVersion::PARQUET_2_6); + builder.data_page_version(::parquet::ParquetDataPageVersion::V2); + builder.compression(::parquet::Compression::UNCOMPRESSED); + const auto write_row_group_size = + row_group_size > 0 ? row_group_size : static_cast(ids.size()); + PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, + write_row_group_size, builder.build())); +} + +Block build_table_block(const std::vector& columns) { + Block block; + for (const auto& column : columns) { + block.insert({column.type->create_column(), column.type, column.name}); + } + return block; +} + +const IColumn& expect_not_null_nullable_nested_column(const IColumn& column) { + if (!column.is_nullable()) { + return column; + } + const auto& nullable_column = assert_cast(column); + for (const auto is_null : nullable_column.get_null_map_data()) { + EXPECT_EQ(is_null, 0); + } + return nullable_column.get_nested_column(); +} + +void expect_nullable_column_all_null(const IColumn& column) { + const auto full_column = column.convert_to_full_column_if_const(); + const auto& nullable_column = assert_cast(*full_column); + for (const auto is_null : nullable_column.get_null_map_data()) { + EXPECT_EQ(is_null, 1); + } +} + +const IColumn& expect_not_null_table_column(const Block& block, size_t position) { + return expect_not_null_nullable_nested_column(*block.get_by_position(position).column); +} + +ColumnDefinition make_table_column(int32_t id, const std::string& name, const DataTypePtr& type); + +void expect_int32_column_values(const IColumn& column, + const std::vector& expected_values) { + const auto full_column = column.convert_to_full_column_if_const(); + const auto& nested_column = expect_not_null_nullable_nested_column(*full_column); + const auto& values = assert_cast(nested_column).get_data(); + ASSERT_EQ(values.size(), expected_values.size()); + for (size_t row = 0; row < expected_values.size(); ++row) { + EXPECT_EQ(values[row], expected_values[row]); + } +} + +SplitReadOptions build_split_options(const std::string& file_path) { + SplitReadOptions options; + options.current_range.__set_path(file_path); + options.current_range.__set_file_size( + static_cast(std::filesystem::file_size(file_path))); + return options; +} + +void set_table_level_row_count(SplitReadOptions* split_options, int64_t row_count) { + split_options->current_range.__isset.table_format_params = true; + split_options->current_range.table_format_params.__isset.table_level_row_count = true; + split_options->current_range.table_format_params.table_level_row_count = row_count; +} + +int64_t parquet_column_start_offset(const ::parquet::ColumnChunkMetaData& column_metadata) { + return column_metadata.has_dictionary_page() + ? static_cast(column_metadata.dictionary_page_offset()) + : static_cast(column_metadata.data_page_offset()); +} + +SplitReadOptions build_split_options_for_row_group_mid(const std::string& file_path, + int row_group_idx) { + auto options = build_split_options(file_path); + auto reader = ::parquet::ParquetFileReader::OpenFile(file_path, false); + auto metadata = reader->metadata(); + auto row_group_metadata = metadata->RowGroup(row_group_idx); + auto first_column = row_group_metadata->ColumnChunk(0); + auto last_column = row_group_metadata->ColumnChunk(row_group_metadata->num_columns() - 1); + const int64_t row_group_start_offset = parquet_column_start_offset(*first_column); + const int64_t row_group_end_offset = + parquet_column_start_offset(*last_column) + last_column->total_compressed_size(); + const int64_t row_group_mid_offset = + row_group_start_offset + (row_group_end_offset - row_group_start_offset) / 2; + options.current_range.__set_start_offset(row_group_mid_offset); + options.current_range.__set_size(1); + return options; +} + +DataTypePtr make_table_test_type(const DataTypePtr& type, bool nullable_root = true) { + DORIS_CHECK(type != nullptr); + const auto nested_type = remove_nullable(type); + DataTypePtr result; + if (const auto* struct_type = typeid_cast(nested_type.get())) { + DataTypes child_types; + child_types.reserve(struct_type->get_elements().size()); + for (const auto& child_type : struct_type->get_elements()) { + child_types.push_back(make_table_test_type(child_type)); + } + result = std::make_shared(child_types, struct_type->get_element_names()); + } else if (const auto* array_type = typeid_cast(nested_type.get())) { + result = std::make_shared( + make_table_test_type(array_type->get_nested_type())); + } else if (const auto* map_type = typeid_cast(nested_type.get())) { + result = std::make_shared(make_table_test_type(map_type->get_key_type()), + make_table_test_type(map_type->get_value_type())); + } else { + result = nested_type; + } + return nullable_root ? make_nullable(result) : result; +} + +ColumnDefinition make_table_column(int32_t id, const std::string& name, const DataTypePtr& type) { + ColumnDefinition column; + if (id >= 0) { + column.identifier = Field::create_field(id); + } + column.name = name; + // TableReader tests model external table scan descriptors. Those table columns are nullable + // even when the Parquet file field itself is required, so keep the test schema aligned with + // the real scan contract at the construction boundary. + column.type = make_table_test_type(type); + return column; +} + +ColumnDefinition make_file_column(int32_t id, const std::string& name, const DataTypePtr& type) { + ColumnDefinition field; + field.identifier = Field::create_field(id); + field.local_id = id; + field.name = name; + field.type = make_table_test_type(type); + return field; +} + +schema::external::TFieldPtr external_schema_field(std::string name, int32_t id, + std::vector aliases = {}) { + auto field = std::make_shared(); + field->__set_name(std::move(name)); + field->__set_id(id); + if (!aliases.empty()) { + field->__set_name_mapping(std::move(aliases)); + } + schema::external::TFieldPtr field_ptr; + field_ptr.field_ptr = std::move(field); + field_ptr.__isset.field_ptr = true; + return field_ptr; +} + +schema::external::TFieldPtr external_array_field(std::string name, int32_t id, + schema::external::TFieldPtr item_field, + std::vector aliases = {}) { + auto field = external_schema_field(std::move(name), id, std::move(aliases)); + schema::external::TArrayField array_field; + array_field.__set_item_field(std::move(item_field)); + field.field_ptr->nestedField.__set_array_field(std::move(array_field)); + field.field_ptr->__isset.nestedField = true; + return field; +} + +schema::external::TFieldPtr external_map_field(std::string name, int32_t id, + schema::external::TFieldPtr key_field, + schema::external::TFieldPtr value_field, + std::vector aliases = {}) { + auto field = external_schema_field(std::move(name), id, std::move(aliases)); + schema::external::TMapField map_field; + map_field.__set_key_field(std::move(key_field)); + map_field.__set_value_field(std::move(value_field)); + field.field_ptr->nestedField.__set_map_field(std::move(map_field)); + field.field_ptr->__isset.nestedField = true; + return field; +} + +schema::external::TFieldPtr external_struct_field(std::string name, int32_t id, + std::vector fields, + std::vector aliases = {}) { + auto field = external_schema_field(std::move(name), id, std::move(aliases)); + schema::external::TStructField struct_field; + struct_field.__set_fields(std::move(fields)); + field.field_ptr->nestedField.__set_struct_field(std::move(struct_field)); + field.field_ptr->__isset.nestedField = true; + return field; +} + +schema::external::TSchema external_schema(int64_t schema_id, + std::vector fields) { + schema::external::TStructField root_field; + root_field.__set_fields(std::move(fields)); + schema::external::TSchema schema; + schema.__set_schema_id(schema_id); + schema.__set_root_field(std::move(root_field)); + return schema; +} + +ColumnDefinition make_nullable_column_definition(ColumnDefinition column) { + column.type = make_table_test_type(column.type); + for (auto& child : column.children) { + child = make_nullable_column_definition(std::move(child)); + } + return column; +} + +MutableColumnPtr make_not_null_nullable_column(MutableColumnPtr nested_column) { + auto null_map = ColumnUInt8::create(); + for (size_t i = 0; i < nested_column->size(); ++i) { + null_map->insert_value(0); + } + return ColumnNullable::create(std::move(nested_column), std::move(null_map)); +} + +class TableReaderCharVarcharTestHelper final : public TableReader { +public: + using TableReader::_should_truncate_char_or_varchar_column; + using TableReader::_truncate_char_or_varchar_column; +}; + +TEST(TableReaderTest, TruncateCharOrVarcharPredicateOnlyAppliesToParquetStringWidthMismatch) { + ColumnMapping mapping; + mapping.table_type = std::make_shared(3, TYPE_VARCHAR); + mapping.file_type = std::make_shared(10, TYPE_VARCHAR); + EXPECT_TRUE(TableReaderCharVarcharTestHelper::_should_truncate_char_or_varchar_column(mapping)); + + mapping.file_type = std::make_shared(2, TYPE_VARCHAR); + EXPECT_FALSE( + TableReaderCharVarcharTestHelper::_should_truncate_char_or_varchar_column(mapping)); + + mapping.file_type = std::make_shared(); + EXPECT_TRUE(TableReaderCharVarcharTestHelper::_should_truncate_char_or_varchar_column(mapping)); + + mapping.file_type = std::make_shared(); + EXPECT_TRUE(TableReaderCharVarcharTestHelper::_should_truncate_char_or_varchar_column(mapping)); + + mapping.table_type = std::make_shared(); + EXPECT_FALSE( + TableReaderCharVarcharTestHelper::_should_truncate_char_or_varchar_column(mapping)); +} + +TEST(TableReaderTest, TruncateCharOrVarcharColumnKeepsNullMap) { + auto nested = ColumnString::create(); + nested->insert_data("abcdef", 6); + nested->insert_data("xyz", 3); + auto null_map = ColumnUInt8::create(); + null_map->insert_value(0); + null_map->insert_value(1); + + auto type = make_nullable(std::make_shared(3, TYPE_VARCHAR)); + Block block; + block.insert({ColumnNullable::create(std::move(nested), std::move(null_map)), type, "v"}); + + TableReaderCharVarcharTestHelper::_truncate_char_or_varchar_column(&block, 0, 3); + + ASSERT_EQ(block.columns(), 1); + ASSERT_EQ(block.rows(), 2); + const auto* nullable_column = + assert_cast(block.get_by_position(0).column.get()); + EXPECT_EQ(nullable_column->get_nested_column().get_data_at(0).to_string(), "abc"); + EXPECT_FALSE(nullable_column->is_null_at(0)); + EXPECT_TRUE(nullable_column->is_null_at(1)); +} + +void set_name_identifiers(std::vector* columns); + +void set_name_identifier(ColumnDefinition* column) { + DORIS_CHECK(column != nullptr); + column->identifier = Field::create_field(column->name); + set_name_identifiers(&column->children); +} + +void set_name_identifiers(std::vector* columns) { + DORIS_CHECK(columns != nullptr); + for (auto& column : *columns) { + set_name_identifier(&column); + } +} + +void add_column_predicate(TableColumnPredicates* column_predicates, GlobalIndex global_index, + std::shared_ptr predicate) { + auto& entry = (*column_predicates)[global_index]; + entry.push_back(std::move(predicate)); +} + +VExprContextSPtr prepared_conjunct(RuntimeState* state, const VExprSPtr& expr) { + auto ctx = VExprContext::create_shared(expr); + auto status = ctx->prepare(state, RowDescriptor()); + EXPECT_TRUE(status.ok()) << status; + status = ctx->open(state); + EXPECT_TRUE(status.ok()) << status; + return ctx; +} + +struct FakeFileReaderState { + int init_count = 0; + int open_count = 0; + int close_count = 0; + int64_t total_rows = 2; + bool eof_with_first_batch = true; + bool inject_delete_conjunct = false; + std::shared_ptr last_request; + std::shared_ptr condition_cache_ctx; +}; + +class FakeFileReader final : public FileReader { +public: + FakeFileReader(std::shared_ptr& system_properties, + std::unique_ptr& file_description, + std::vector schema, std::shared_ptr state) + : FileReader(system_properties, file_description, nullptr, nullptr), + _schema(std::move(schema)), + _state(std::move(state)) {} + + Status init(RuntimeState* state) override { + (void)state; + ++_state->init_count; + _eof = false; + return Status::OK(); + } + + Status get_schema(std::vector* file_schema) const override { + DORIS_CHECK(file_schema != nullptr); + *file_schema = _schema; + for (auto& column : *file_schema) { + column = make_nullable_column_definition(std::move(column)); + } + return Status::OK(); + } + + Status open(std::shared_ptr request) override { + RETURN_IF_ERROR(FileReader::open(std::move(request))); + _state->last_request = _request; + ++_state->open_count; + _returned_batch = false; + return Status::OK(); + } + + Status get_block(Block* file_block, size_t* rows, bool* eof) override { + DORIS_CHECK(file_block != nullptr); + DORIS_CHECK(rows != nullptr); + DORIS_CHECK(eof != nullptr); + DORIS_CHECK(_request != nullptr); + if (_returned_batch) { + *rows = 0; + *eof = true; + return Status::OK(); + } + + for (const auto& [file_column_id, block_position] : _request->local_positions) { + if (file_column_id == LocalColumnId(0)) { + auto column = ColumnInt32::create(); + column->insert_value(1); + column->insert_value(2); + file_block->replace_by_position(block_position.value(), + make_not_null_nullable_column(std::move(column))); + } else if (file_column_id == LocalColumnId(1)) { + auto column = ColumnString::create(); + column->insert_data("one", 3); + column->insert_data("two", 3); + file_block->replace_by_position(block_position.value(), + make_not_null_nullable_column(std::move(column))); + } else if (file_column_id == LocalColumnId(2)) { + auto country_values = ColumnString::create(); + country_values->insert_data("USA", 3); + country_values->insert_data("UK", 2); + auto country_column = make_not_null_nullable_column(std::move(country_values)); + + auto city_column = ColumnString::create(); + city_column->insert_data("New York", 8); + city_column->insert_data("London", 6); + + MutableColumns struct_children; + struct_children.push_back(std::move(country_column)); + struct_children.push_back(make_not_null_nullable_column(std::move(city_column))); + auto struct_column = ColumnStruct::create(std::move(struct_children)); + + file_block->replace_by_position( + block_position.value(), + make_not_null_nullable_column(std::move(struct_column))); + } else { + return Status::InvalidArgument("Unexpected fake file column id {}", + file_column_id.value()); + } + } + + _returned_batch = true; + *rows = 2; + *eof = _state->eof_with_first_batch; + if (_state->condition_cache_ctx != nullptr && !_state->condition_cache_ctx->is_hit && + _state->condition_cache_ctx->filter_result != nullptr && + !_state->condition_cache_ctx->filter_result->empty()) { + // The real file reader marks a granule after local row-level predicates keep at least + // one row from that granule. The fake reader does it here so TableReader tests can + // focus on condition-cache lifecycle decisions without depending on Parquet internals. + (*_state->condition_cache_ctx->filter_result)[0] = true; + } + return Status::OK(); + } + + void set_condition_cache_context(std::shared_ptr ctx) override { + _state->condition_cache_ctx = std::move(ctx); + } + + int64_t get_total_rows() const override { return _state->total_rows; } + + Status close() override { + ++_state->close_count; + _request.reset(); + _eof = true; + return Status::OK(); + } + +private: + std::vector _schema; + std::shared_ptr _state; + bool _returned_batch = false; +}; + +class FakeTableReader final : public TableReader { +public: + FakeTableReader(std::vector file_schema, + std::shared_ptr state) + : _file_schema(std::move(file_schema)), _state(std::move(state)) {} + +protected: + Status create_file_reader(std::unique_ptr* reader) override { + DORIS_CHECK(reader != nullptr); + auto system_properties = std::make_shared(); + system_properties->system_type = TFileType::FILE_LOCAL; + auto file_description = std::make_unique(); + file_description->path = "fake-table-reader-input"; + *reader = std::make_unique(system_properties, file_description, + _file_schema, _state); + return Status::OK(); + } + + Status customize_file_scan_request(FileScanRequest* file_request) override { + RETURN_IF_ERROR(TableReader::customize_file_scan_request(file_request)); + if (_state->inject_delete_conjunct) { + // Table-format delete handling is represented in v2 by TableReader injecting + // delete_conjuncts into the file scan request. The fake reader does not execute it; + // this only tests that condition cache is disabled once such table-level delete state + // is present in the request. + file_request->delete_conjuncts.push_back( + VExprContext::create_shared(table_int32_literal(1))); + } + return Status::OK(); + } + +private: + std::vector _file_schema; + std::shared_ptr _state; +}; + +class ScopedConditionCacheForTest { +public: + ScopedConditionCacheForTest() + : _previous(ExecEnv::GetInstance()->get_condition_cache()), + _cache(segment_v2::ConditionCache::create_global_cache(1024 * 1024, 4)) { + ExecEnv::GetInstance()->_condition_cache = _cache.get(); + } + + ~ScopedConditionCacheForTest() { ExecEnv::GetInstance()->_condition_cache = _previous; } + + segment_v2::ConditionCache* get() { return _cache.get(); } + +private: + segment_v2::ConditionCache* _previous = nullptr; + std::unique_ptr _cache; +}; + +TEST(TableReaderTest, CanUseInjectedFileReaderForStandaloneUnitTest) { + std::vector file_schema; + file_schema.push_back(make_file_column(0, "id", std::make_shared())); + file_schema.push_back(make_file_column(1, "value", std::make_shared())); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(1, "value", std::make_shared())); + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + set_name_identifiers(&projected_columns); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto fake_state = std::make_shared(); + FakeTableReader reader(file_schema, fake_state); + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + SplitReadOptions split_options; + split_options.current_range.__set_path("fake-table-reader-input"); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + EXPECT_FALSE(eos); + + ASSERT_EQ(fake_state->init_count, 1); + ASSERT_EQ(fake_state->open_count, 1); + ASSERT_EQ(fake_state->close_count, 1); + ASSERT_NE(fake_state->last_request, nullptr); + ASSERT_EQ(fake_state->last_request->local_positions.at(LocalColumnId(1)).value(), 0); + ASSERT_EQ(fake_state->last_request->local_positions.at(LocalColumnId(0)).value(), 1); + EXPECT_EQ(projection_ids(fake_state->last_request->non_predicate_columns), + std::vector({1, 0})); + EXPECT_TRUE(fake_state->last_request->predicate_columns.empty()); + + const auto& value_column = + assert_cast(expect_not_null_table_column(block, 0)); + const auto& id_column = assert_cast(expect_not_null_table_column(block, 1)); + ASSERT_EQ(block.rows(), 2); + EXPECT_EQ(value_column.get_data_at(0).to_string(), "one"); + EXPECT_EQ(value_column.get_data_at(1).to_string(), "two"); + EXPECT_EQ(id_column.get_element(0), 1); + EXPECT_EQ(id_column.get_element(1), 2); + + block = build_table_block(projected_columns); + eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + EXPECT_TRUE(eos); +} + +TEST(TableReaderTest, DebugStringCoversReaderStateAndEnumNames) { + std::vector file_schema; + file_schema.push_back(make_file_column(0, "id", std::make_shared())); + file_schema.push_back(make_file_column(1, "value", std::make_shared())); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + projected_columns.push_back(make_table_column(1, "value", std::make_shared())); + projected_columns[0].name_mapping = {"legacy_id"}; + set_name_identifiers(&projected_columns); + + TableColumnPredicates column_predicates; + add_column_predicate(&column_predicates, GlobalIndex(0), + create_comparison_predicate( + 0, "id", make_nullable(std::make_shared()), + Field::create_field(0), false)); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto fake_state = std::make_shared(); + fake_state->eof_with_first_batch = false; + FakeTableReader reader(file_schema, fake_state); + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = std::move(column_predicates), + .conjuncts = {prepared_conjunct( + &state, table_int32_greater_than_expr(0, 0, 0))}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = std::make_shared(), + .runtime_state = &state, + .scanner_profile = nullptr, + .push_down_agg_type = TPushAggOp::type::COUNT, + }) + .ok()); + + SplitReadOptions split_options; + split_options.partition_values.emplace("dt", Field::create_field("2026-06-29")); + split_options.current_range.__set_path("fake-table-reader-input"); + split_options.current_range.__set_file_size(64); + split_options.current_range.__set_start_offset(7); + split_options.current_range.__set_size(11); + split_options.current_range.__set_modification_time(13); + split_options.current_range.__set_fs_name("local-fs"); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + + const auto debug = reader.debug_string(); + EXPECT_NE(debug.find("format=PARQUET"), std::string::npos); + EXPECT_NE(debug.find("push_down_agg_type=COUNT"), std::string::npos); + EXPECT_NE(debug.find("current_file=FileDescription{path=fake-table-reader-input"), + std::string::npos); + EXPECT_NE(debug.find("partition_values={dt}"), std::string::npos); + EXPECT_NE(debug.find("table_filters=[TableFilter{conjunct=VExprContext"), std::string::npos); + EXPECT_NE(debug.find("table_column_predicates={0:{predicate_count=1}}"), std::string::npos); + EXPECT_NE(debug.find("ColumnDefinition{name=id"), std::string::npos); + EXPECT_NE(debug.find("name_mapping=[legacy_id]"), std::string::npos); + EXPECT_NE(debug.find("ColumnMapping{global_index=0"), std::string::npos); + EXPECT_NE(debug.find("FileBlockColumn{file_column_id=0"), std::string::npos); + ASSERT_TRUE(reader.close().ok()); + + const std::vector formats {FileFormat::ORC, FileFormat::CSV, FileFormat::JSON, + FileFormat::TEXT, FileFormat::JNI, FileFormat::NATIVE, + FileFormat::ARROW}; + const std::vector format_names {"ORC", "CSV", "JSON", "TEXT", + "JNI", "NATIVE", "ARROW"}; + for (size_t idx = 0; idx < formats.size(); ++idx) { + TableReader enum_reader; + ASSERT_TRUE(enum_reader + .init({ + .projected_columns = {}, + .column_predicates = {}, + .conjuncts = {}, + .format = formats[idx], + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + EXPECT_NE(enum_reader.debug_string().find("format=" + format_names[idx]), + std::string::npos); + } + + const std::vector agg_ops {TPushAggOp::type::NONE, TPushAggOp::type::MINMAX, + TPushAggOp::type::MIX, + TPushAggOp::type::COUNT_ON_INDEX}; + const std::vector agg_names {"NONE", "MINMAX", "MIX", "COUNT_ON_INDEX"}; + for (size_t idx = 0; idx < agg_ops.size(); ++idx) { + TableReader enum_reader; + ASSERT_TRUE(enum_reader + .init({ + .projected_columns = {}, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .push_down_agg_type = agg_ops[idx], + }) + .ok()); + EXPECT_NE(enum_reader.debug_string().find("push_down_agg_type=" + agg_names[idx]), + std::string::npos); + } +} + +TEST(TableReaderTest, AnnotateProjectedColumnUsesCurrentHistorySchemaForNestedTypes) { + TFileScanRangeParams scan_params; + scan_params.__set_current_schema_id(200); + + auto profile_field = external_struct_field( + "profile", 20, + {external_array_field("old_scores", 21, external_schema_field("old_score", 22), + {"scores"}), + external_map_field("old_props", 23, external_schema_field("old_key", 24), + external_schema_field("old_value", 25), {"props"})}, + {"user_profile"}); + scan_params.__set_history_schema_info( + {external_schema(100, {external_schema_field("ignored_profile", 10)}), + external_schema(200, {profile_field})}); + + const auto int_type = std::make_shared(); + const auto string_type = std::make_shared(); + auto scores_type = std::make_shared(int_type); + auto props_type = std::make_shared(string_type, string_type); + auto profile_type = std::make_shared(DataTypes {scores_type, props_type}, + Strings {"scores", "props"}); + + ColumnDefinition profile_column = make_table_column(-1, "user_profile", profile_type); + ProjectedColumnBuildContext context; + context.scan_params = &scan_params; + TFileScanSlotInfo slot_info; + TableReader reader; + ASSERT_TRUE(reader.annotate_projected_column(slot_info, &context, &profile_column).ok()); + + EXPECT_EQ(profile_column.get_identifier_field_id(), 20); + EXPECT_EQ(profile_column.name_mapping, std::vector({"user_profile"})); + ASSERT_TRUE(context.schema_column.has_value()); + ASSERT_EQ(context.schema_column->children.size(), 2); + EXPECT_EQ(context.schema_column->children[0].name, "old_scores"); + EXPECT_EQ(context.schema_column->children[0].get_identifier_field_id(), 21); + ASSERT_EQ(context.schema_column->children[0].children.size(), 1); + EXPECT_EQ(context.schema_column->children[0].children[0].name, "element"); + EXPECT_EQ(context.schema_column->children[0].children[0].get_identifier_field_id(), 22); + ASSERT_EQ(context.schema_column->children[1].children.size(), 2); + EXPECT_EQ(context.schema_column->children[1].name, "old_props"); + EXPECT_EQ(context.schema_column->children[1].children[0].name, "key"); + EXPECT_EQ(context.schema_column->children[1].children[0].get_identifier_field_id(), 24); + EXPECT_EQ(context.schema_column->children[1].children[1].name, "value"); + EXPECT_EQ(context.schema_column->children[1].children[1].get_identifier_field_id(), 25); +} + +TEST(TableReaderTest, ComplexRematerializeCastsScalarChildToTableType) { + const auto string_type = std::make_shared(); + const auto nullable_string_type = make_nullable(string_type); + const auto file_struct_type = make_nullable(std::make_shared( + DataTypes {nullable_string_type, string_type}, Strings {"country", "city"})); + auto file_struct_column = make_file_column(2, "struct_column", file_struct_type); + file_struct_column.children = {make_file_column(0, "country", nullable_string_type), + make_file_column(1, "city", string_type)}; + std::vector file_schema = {file_struct_column}; + + const auto table_struct_type = make_nullable(std::make_shared( + DataTypes {nullable_string_type, nullable_string_type}, Strings {"country", "city"})); + auto country_child = make_table_column(0, "country", nullable_string_type); + auto city_child = make_table_column(1, "city", nullable_string_type); + auto table_struct_column = make_table_column(2, "struct_column", table_struct_type); + table_struct_column.children = {country_child, city_child}; + std::vector projected_columns = {table_struct_column}; + set_name_identifiers(&projected_columns); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto fake_state = std::make_shared(); + FakeTableReader reader(file_schema, fake_state); + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + SplitReadOptions split_options; + split_options.current_range.__set_path("fake-table-reader-input"); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + const auto status = reader.get_block(&block, &eos); + ASSERT_TRUE(status.ok()) << status.to_string(); + ASSERT_FALSE(eos); + ASSERT_TRUE(block.check_type_and_column().ok()) << block.dump_structure(); + + const auto& result_nullable = + assert_cast(*block.get_by_position(0).column); + const auto& struct_result = + assert_cast(result_nullable.get_nested_column()); + ASSERT_EQ(struct_result.get_columns().size(), 2); + const auto& country_column = assert_cast(struct_result.get_column(0)); + const auto& city_column = assert_cast(struct_result.get_column(1)); + const auto& country_values = + assert_cast(country_column.get_nested_column()); + const auto& city_values = assert_cast(city_column.get_nested_column()); + ASSERT_EQ(city_column.size(), 2); + EXPECT_FALSE(city_column.is_null_at(0)); + EXPECT_FALSE(city_column.is_null_at(1)); + EXPECT_EQ(country_values.get_data_at(0).to_string(), "USA"); + EXPECT_EQ(country_values.get_data_at(1).to_string(), "UK"); + EXPECT_EQ(city_values.get_data_at(0).to_string(), "New York"); + EXPECT_EQ(city_values.get_data_at(1).to_string(), "London"); +} + +TEST(TableReaderTest, ReopenSplitAfterClose) { + const auto test_dir = std::filesystem::temp_directory_path() / "doris_table_reader_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const std::vector file_paths = { + (test_dir / "split_1.parquet").string(), + (test_dir / "split_2.parquet").string(), + (test_dir / "split_3.parquet").string(), + }; + write_parquet_file(file_paths[0], 1, "one"); + write_parquet_file(file_paths[1], 2, "two"); + write_parquet_file(file_paths[2], 3, "three"); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(1, "value", std::make_shared())); + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {prepared_conjunct( + &state, table_int32_greater_than_expr(1, 1, 0))}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + // Simulate the scanner lifecycle for three different splits: + // init() once, then repeat prepare_split() -> get_block() -> close(). + // This verifies TableReader::close() fully releases the previous low-level reader and task + // state, so a later prepare_split() can open and read a new split on the same TableReader. + // The table-level conjunct is also rebuilt for each split. The projection order puts value + // before id, so the pushed conjunct has to be rewritten to the ParquetReader file-local block + // position every time a new split is opened. + std::vector ids; + std::vector values; + for (const auto& file_path : file_paths) { + auto split_options = build_split_options(file_path); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + + const auto& value_column = + assert_cast(expect_not_null_table_column(block, 0)); + const auto& id_column = + assert_cast(expect_not_null_table_column(block, 1)); + ASSERT_EQ(id_column.size(), 1); + ASSERT_EQ(value_column.size(), 1); + ids.push_back(id_column.get_element(0)); + values.push_back(value_column.get_data_at(0).to_string()); + + ASSERT_TRUE(reader.close().ok()); + } + + EXPECT_EQ(ids, std::vector({1, 2, 3})); + EXPECT_EQ(values, std::vector({"one", "two", "three"})); + + std::filesystem::remove_all(test_dir); +} + +// Scenario: column predicates are pruning hints only. They do not produce a row-level survivor +// bitmap, so TableReader must not enable condition cache when the scan request has no conjuncts. +TEST(TableReaderTest, ConditionCacheSkipsColumnPredicateOnlyRequest) { + std::vector file_schema; + file_schema.push_back(make_file_column(0, "id", std::make_shared())); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + set_name_identifiers(&projected_columns); + + TableColumnPredicates column_predicates; + add_column_predicate(&column_predicates, GlobalIndex(0), + create_comparison_predicate( + 0, "id", make_nullable(std::make_shared()), + Field::create_field(0), false)); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto fake_state = std::make_shared(); + FakeTableReader reader(file_schema, fake_state); + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = std::move(column_predicates), + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .condition_cache_digest = 7, + }) + .ok()); + + SplitReadOptions split_options; + split_options.current_range.__set_path("fake-table-reader-input"); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + EXPECT_EQ(fake_state->condition_cache_ctx, nullptr); + EXPECT_EQ(reader.condition_cache_hit_count(), 0); + ASSERT_TRUE(reader.close().ok()); +} + +// Scenario: runtime filters can arrive late and are not represented by the stable predicate digest. +// A MISS must not insert a bitmap for `stable predicate AND runtime filter` under the stable digest. +TEST(TableReaderTest, ConditionCacheSkipsRuntimeFilterConjunct) { + std::vector file_schema; + file_schema.push_back(make_file_column(0, "id", std::make_shared())); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + set_name_identifiers(&projected_columns); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto fake_state = std::make_shared(); + FakeTableReader reader(file_schema, fake_state); + ASSERT_TRUE( + reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {prepared_conjunct( + &state, runtime_filter_wrapper_expr( + table_int32_greater_than_expr(0, 0, 0)))}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .condition_cache_digest = 7, + }) + .ok()); + + SplitReadOptions split_options; + split_options.current_range.__set_path("fake-table-reader-input"); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + EXPECT_EQ(fake_state->condition_cache_ctx, nullptr); + EXPECT_EQ(reader.condition_cache_hit_count(), 0); + ASSERT_TRUE(reader.close().ok()); +} + +// Scenario: table-format delete files/deletion vectors are outside the data-file cache key. When +// TableReader injects delete conjuncts into the file scan request, condition cache must be disabled +// for that split. +TEST(TableReaderTest, ConditionCacheSkipsRequestWithDeleteConjuncts) { + std::vector file_schema; + file_schema.push_back(make_file_column(0, "id", std::make_shared())); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + set_name_identifiers(&projected_columns); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto fake_state = std::make_shared(); + fake_state->inject_delete_conjunct = true; + FakeTableReader reader(file_schema, fake_state); + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {prepared_conjunct( + &state, table_int32_greater_than_expr(0, 0, 0))}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .condition_cache_digest = 7, + }) + .ok()); + + SplitReadOptions split_options; + split_options.current_range.__set_path("fake-table-reader-input"); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + EXPECT_EQ(fake_state->condition_cache_ctx, nullptr); + EXPECT_EQ(reader.condition_cache_hit_count(), 0); + ASSERT_TRUE(reader.close().ok()); +} + +// Scenario: a MISS bitmap is safe to publish only after the physical reader reaches EOF. This test +// returns EOF together with the first batch and verifies TableReader publishes the marked bitmap. +TEST(TableReaderTest, ConditionCacheMissPublishesBitmapAfterReaderEof) { + ScopedConditionCacheForTest cache; + + std::vector file_schema; + file_schema.push_back(make_file_column(0, "id", std::make_shared())); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + set_name_identifiers(&projected_columns); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto fake_state = std::make_shared(); + fake_state->total_rows = ConditionCacheContext::GRANULE_SIZE; + FakeTableReader reader(file_schema, fake_state); + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {prepared_conjunct( + &state, table_int32_greater_than_expr(0, 0, 0))}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .condition_cache_digest = 7, + }) + .ok()); + + SplitReadOptions split_options; + split_options.current_range.__set_path("fake-table-reader-input"); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_NE(fake_state->condition_cache_ctx, nullptr); + EXPECT_FALSE(fake_state->condition_cache_ctx->is_hit); + + segment_v2::ConditionCache::ExternalCacheKey key("fake-table-reader-input", 0, -1, 7, 0, -1); + segment_v2::ConditionCacheHandle handle; + ASSERT_TRUE(cache.get()->lookup(key, &handle)); + const auto cached_bitmap = handle.get_filter_result(); + ASSERT_NE(cached_bitmap, nullptr); + ASSERT_FALSE(cached_bitmap->empty()); + EXPECT_TRUE((*cached_bitmap)[0]); + + ASSERT_TRUE(reader.close().ok()); +} + +// Scenario: LIMIT/cancel can close a reader before it reaches EOF. TableReader must drop the MISS +// bitmap because unvisited granules would still be false and unsafe for future cache hits. +TEST(TableReaderTest, ConditionCacheMissIsDroppedWhenReaderClosesBeforeEof) { + ScopedConditionCacheForTest cache; + + std::vector file_schema; + file_schema.push_back(make_file_column(0, "id", std::make_shared())); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + set_name_identifiers(&projected_columns); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + auto fake_state = std::make_shared(); + fake_state->total_rows = ConditionCacheContext::GRANULE_SIZE; + fake_state->eof_with_first_batch = false; + FakeTableReader reader(file_schema, fake_state); + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {prepared_conjunct( + &state, table_int32_greater_than_expr(0, 0, 0))}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .condition_cache_digest = 7, + }) + .ok()); + + SplitReadOptions split_options; + split_options.current_range.__set_path("fake-table-reader-input"); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_NE(fake_state->condition_cache_ctx, nullptr); + EXPECT_FALSE(fake_state->condition_cache_ctx->is_hit); + + ASSERT_TRUE(reader.close().ok()); + segment_v2::ConditionCache::ExternalCacheKey key("fake-table-reader-input", 0, -1, 7, 0, -1); + segment_v2::ConditionCacheHandle handle; + EXPECT_FALSE(cache.get()->lookup(key, &handle)); +} + +TEST(TableReaderTest, PushDownCountFromNewParquetReader) { + const auto test_dir = std::filesystem::temp_directory_path() / "doris_table_reader_count_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3, 4, 5}, {10, 20, 30, 40, 50}, + {"one", "two", "three", "four", "five"}, 2); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .push_down_agg_type = TPushAggOp::type::COUNT, + }) + .ok()); + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 5); + EXPECT_FALSE(is_column_const(*block.get_by_position(0).column)); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, TableLevelCountUsesAssignedRowCount) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_table_count_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + TQueryOptions query_options; + query_options.__set_batch_size(2); + RuntimeState state {query_options, TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .push_down_agg_type = TPushAggOp::type::COUNT, + }) + .ok()); + auto split_options = build_split_options(file_path); + set_table_level_row_count(&split_options, 5); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + EXPECT_EQ(block.rows(), 2); + + block = build_table_block(projected_columns); + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + EXPECT_EQ(block.rows(), 2); + + block = build_table_block(projected_columns); + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + EXPECT_EQ(block.rows(), 1); + + block = build_table_block(projected_columns); + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + EXPECT_TRUE(eos); + EXPECT_EQ(block.rows(), 0); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, PushDownMinMaxFromNewParquetReader) { + const auto test_dir = std::filesystem::temp_directory_path() / "doris_table_reader_minmax_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_int_pair_parquet_file(file_path, {3, 1, 5, 2}, {30, 10, 50, 20}, + {"three", "one", "five", "two"}, 2); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + projected_columns.push_back(make_table_column(1, "score", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .push_down_agg_type = TPushAggOp::type::MINMAX, + }) + .ok()); + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 2); + const auto& id_column = assert_cast(expect_not_null_table_column(block, 0)); + const auto& score_column = + assert_cast(expect_not_null_table_column(block, 1)); + EXPECT_EQ(id_column.get_element(0), 1); + EXPECT_EQ(id_column.get_element(1), 5); + EXPECT_EQ(score_column.get_element(0), 10); + EXPECT_EQ(score_column.get_element(1), 50); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, PushDownMinMaxCastsFileValueToTableType) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_minmax_cast_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_int_pair_parquet_file(file_path, {3, 1, 5, 2}, {30, 10, 50, 20}, + {"three", "one", "five", "two"}, 2); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .push_down_agg_type = TPushAggOp::type::MINMAX, + }) + .ok()); + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + auto status = reader.get_block(&block, &eos); + ASSERT_TRUE(status.ok()) << status; + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 2); + const auto& id_column = assert_cast(expect_not_null_table_column(block, 0)); + EXPECT_EQ(id_column.get_element(0), 1); + EXPECT_EQ(id_column.get_element(1), 5); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, PushDownMinMaxFromProjectedStructLeaf) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_minmax_struct_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_struct_parquet_file(file_path, {3, 1, 5, 2}, 2); + + const auto int_type = std::make_shared(); + auto id_child = make_table_column(0, "id", int_type); + auto struct_type = std::make_shared(DataTypes {int_type}, Strings {"id"}); + auto struct_column = make_table_column(100, "s", struct_type); + struct_column.children = {id_child}; + std::vector projected_columns = {struct_column}; + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .push_down_agg_type = TPushAggOp::type::MINMAX, + }) + .ok()); + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + auto status = reader.get_block(&block, &eos); + ASSERT_TRUE(status.ok()) << status; + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 2); + const auto& struct_result = + assert_cast(expect_not_null_table_column(block, 0)); + ASSERT_EQ(struct_result.get_columns().size(), 1); + const auto& ids = assert_cast( + expect_not_null_nullable_nested_column(struct_result.get_column(0))); + EXPECT_EQ(ids.get_element(0), 1); + EXPECT_EQ(ids.get_element(1), 5); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, PushDownMinMaxFallsBackForProjectedListStructLeaf) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_minmax_list_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_list_struct_parquet_file(file_path); + + const auto int_type = std::make_shared(); + const auto nullable_int_type = make_nullable(int_type); + auto element_type = std::make_shared( + DataTypes {nullable_int_type, nullable_int_type}, Strings {"a", "b"}); + auto nullable_element_type = make_nullable(element_type); + auto list_column = + make_table_column(100, "xs", std::make_shared(nullable_element_type)); + std::vector projected_columns = {list_column}; + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .push_down_agg_type = TPushAggOp::type::MINMAX, + }) + .ok()); + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + auto status = reader.get_block(&block, &eos); + ASSERT_TRUE(status.ok()) << status; + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 3); + const auto& array_result = + assert_cast(expect_not_null_table_column(block, 0)); + EXPECT_EQ(array_result.get_offsets()[0], 2); + EXPECT_EQ(array_result.get_offsets()[1], 3); + EXPECT_EQ(array_result.get_offsets()[2], 4); + const auto& nullable_elements = assert_cast(array_result.get_data()); + for (const auto is_null : nullable_elements.get_null_map_data()) { + EXPECT_EQ(is_null, 0); + } + const auto& element_struct = + assert_cast(nullable_elements.get_nested_column()); + ASSERT_EQ(element_struct.get_columns().size(), 2); + const auto& a_values = assert_cast( + expect_not_null_nullable_nested_column(element_struct.get_column(0))); + EXPECT_EQ(a_values.get_element(0), 10); + EXPECT_EQ(a_values.get_element(1), 20); + EXPECT_EQ(a_values.get_element(2), 30); + EXPECT_EQ(a_values.get_element(3), 40); + const auto& b_values = assert_cast( + expect_not_null_nullable_nested_column(element_struct.get_column(1))); + EXPECT_EQ(b_values.get_element(0), 11); + EXPECT_EQ(b_values.get_element(1), 21); + EXPECT_EQ(b_values.get_element(2), 31); + EXPECT_EQ(b_values.get_element(3), 41); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, ProjectedListStructReadsSelectedElementChild) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_list_projection_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_list_struct_parquet_file(file_path); + + const auto int_type = std::make_shared(); + auto a_child = make_table_column(0, "a", int_type); + auto element_type = std::make_shared(DataTypes {int_type}, Strings {"a"}); + auto nullable_element_type = make_nullable(element_type); + auto element_child = make_table_column(0, "element", nullable_element_type); + element_child.children = {a_child}; + auto list_column = + make_table_column(100, "xs", std::make_shared(nullable_element_type)); + list_column.children = {element_child}; + std::vector projected_columns = {list_column}; + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 3); + const auto& array_result = + assert_cast(expect_not_null_table_column(block, 0)); + EXPECT_EQ(array_result.get_offsets()[0], 2); + EXPECT_EQ(array_result.get_offsets()[1], 3); + EXPECT_EQ(array_result.get_offsets()[2], 4); + const auto& nullable_elements = assert_cast(array_result.get_data()); + const auto& element_struct = + assert_cast(nullable_elements.get_nested_column()); + ASSERT_EQ(element_struct.get_columns().size(), 1); + const auto& a_values = assert_cast( + expect_not_null_nullable_nested_column(element_struct.get_column(0))); + EXPECT_EQ(a_values.get_element(0), 10); + EXPECT_EQ(a_values.get_element(1), 20); + EXPECT_EQ(a_values.get_element(2), 30); + EXPECT_EQ(a_values.get_element(3), 40); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, ProjectedListStructReordersRenamedAndMissingElementChildren) { + const auto test_dir = std::filesystem::temp_directory_path() / + "doris_table_reader_list_schema_evolution_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_list_struct_parquet_file(file_path); + + const auto int_type = std::make_shared(); + const auto nullable_int_type = make_nullable(int_type); + const auto string_type = std::make_shared(); + auto b_child = make_table_column(1, "renamed_b", nullable_int_type); + b_child.name_mapping = {"b"}; + auto missing_child = make_table_column(99, "missing_child", string_type); + auto a_child = make_table_column(0, "renamed_a", nullable_int_type); + a_child.name_mapping = {"a"}; + auto element_type = std::make_shared( + DataTypes {nullable_int_type, string_type, nullable_int_type}, + Strings {"renamed_b", "missing_child", "renamed_a"}); + auto nullable_element_type = make_nullable(element_type); + auto element_child = make_table_column(0, "element", nullable_element_type); + element_child.children = {b_child, missing_child, a_child}; + auto list_column = + make_table_column(100, "xs", std::make_shared(nullable_element_type)); + list_column.children = {element_child}; + std::vector projected_columns = {list_column}; + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 3); + const auto& array_result = + assert_cast(expect_not_null_table_column(block, 0)); + EXPECT_EQ(array_result.get_offsets()[0], 2); + EXPECT_EQ(array_result.get_offsets()[1], 3); + EXPECT_EQ(array_result.get_offsets()[2], 4); + const auto& nullable_elements = assert_cast(array_result.get_data()); + const auto& element_struct = + assert_cast(nullable_elements.get_nested_column()); + ASSERT_EQ(element_struct.get_columns().size(), 3); + const auto& b_values = assert_cast( + expect_not_null_nullable_nested_column(element_struct.get_column(0))); + const auto& missing_values = element_struct.get_column(1); + const auto& a_values = assert_cast( + expect_not_null_nullable_nested_column(element_struct.get_column(2))); + EXPECT_EQ(b_values.get_element(0), 11); + EXPECT_EQ(b_values.get_element(1), 21); + EXPECT_EQ(b_values.get_element(2), 31); + EXPECT_EQ(b_values.get_element(3), 41); + expect_nullable_column_all_null(missing_values); + EXPECT_EQ(a_values.get_element(0), 10); + EXPECT_EQ(a_values.get_element(1), 20); + EXPECT_EQ(a_values.get_element(2), 30); + EXPECT_EQ(a_values.get_element(3), 40); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +// Scenario: when every projected array-element struct child is missing/default-only, the reader +// still receives a full element projection and can materialize the default child without crashing. +TEST(TableReaderTest, ProjectedListStructOnlyMissingElementChildFallsBackToFullElement) { + const auto test_dir = std::filesystem::temp_directory_path() / + "doris_table_reader_list_only_missing_child_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_list_struct_parquet_file(file_path); + + const auto string_type = std::make_shared(); + auto missing_child = make_table_column(99, "missing_child", string_type); + auto element_type = + std::make_shared(DataTypes {string_type}, Strings {"missing_child"}); + auto nullable_element_type = make_nullable(element_type); + auto element_child = make_table_column(0, "element", nullable_element_type); + element_child.children = {missing_child}; + auto list_column = + make_table_column(100, "xs", std::make_shared(nullable_element_type)); + list_column.children = {element_child}; + std::vector projected_columns = {list_column}; + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 3); + const auto& array_result = + assert_cast(expect_not_null_table_column(block, 0)); + EXPECT_EQ(array_result.get_offsets()[0], 2); + EXPECT_EQ(array_result.get_offsets()[1], 3); + EXPECT_EQ(array_result.get_offsets()[2], 4); + const auto& nullable_elements = assert_cast(array_result.get_data()); + const auto& element_struct = + assert_cast(nullable_elements.get_nested_column()); + ASSERT_EQ(element_struct.get_columns().size(), 1); + expect_nullable_column_all_null(element_struct.get_column(0)); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, PushDownMinMaxFallsBackForProjectedMapValueStructLeaf) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_minmax_map_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_map_struct_parquet_file(file_path); + + const auto key_type = std::make_shared(); + const auto string_type = std::make_shared(); + const auto nullable_string_type = make_nullable(string_type); + auto b_child = make_table_column(1, "b", nullable_string_type); + auto value_type = + std::make_shared(DataTypes {nullable_string_type}, Strings {"b"}); + auto nullable_value_type = make_nullable(value_type); + auto value_child = make_table_column(1, "value", nullable_value_type); + value_child.children = {b_child}; + auto map_column = make_table_column( + 100, "kv", std::make_shared(key_type, nullable_value_type)); + map_column.children = {value_child}; + std::vector projected_columns = {map_column}; + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .push_down_agg_type = TPushAggOp::type::MINMAX, + }) + .ok()); + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 3); + const auto& map_result = assert_cast(expect_not_null_table_column(block, 0)); + EXPECT_EQ(map_result.get_offsets()[0], 2); + EXPECT_EQ(map_result.get_offsets()[1], 3); + EXPECT_EQ(map_result.get_offsets()[2], 3); + const auto& keys = assert_cast( + expect_not_null_nullable_nested_column(map_result.get_keys())); + EXPECT_EQ(keys.get_element(0), 1); + EXPECT_EQ(keys.get_element(1), 2); + EXPECT_EQ(keys.get_element(2), 3); + const auto& nullable_values = assert_cast(map_result.get_values()); + for (const auto is_null : nullable_values.get_null_map_data()) { + EXPECT_EQ(is_null, 0); + } + const auto& value_struct = + assert_cast(nullable_values.get_nested_column()); + ASSERT_EQ(value_struct.get_columns().size(), 1); + const auto& b_values = assert_cast( + expect_not_null_nullable_nested_column(value_struct.get_column(0))); + EXPECT_EQ(b_values.get_data_at(0).to_string(), "ma"); + EXPECT_EQ(b_values.get_data_at(1).to_string(), "mb"); + EXPECT_EQ(b_values.get_data_at(2).to_string(), "mc"); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, ProjectedMapValueStructReordersRenamedAndMissingChildren) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_map_schema_evolution_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_map_struct_parquet_file(file_path); + + const auto key_type = std::make_shared(); + const auto int_type = std::make_shared(); + const auto nullable_int_type = make_nullable(int_type); + const auto string_type = std::make_shared(); + const auto nullable_string_type = make_nullable(string_type); + auto b_child = make_table_column(1, "renamed_b", nullable_string_type); + b_child.name_mapping = {"b"}; + auto missing_child = make_table_column(99, "missing_child", string_type); + auto a_child = make_table_column(0, "renamed_a", nullable_int_type); + a_child.name_mapping = {"a"}; + auto value_type = std::make_shared( + DataTypes {nullable_string_type, string_type, nullable_int_type}, + Strings {"renamed_b", "missing_child", "renamed_a"}); + auto nullable_value_type = make_nullable(value_type); + auto value_child = make_table_column(1, "value", nullable_value_type); + value_child.children = {b_child, missing_child, a_child}; + auto map_column = make_table_column( + 100, "kv", std::make_shared(key_type, nullable_value_type)); + map_column.children = {value_child}; + std::vector projected_columns = {map_column}; + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 3); + const auto& map_result = assert_cast(expect_not_null_table_column(block, 0)); + EXPECT_EQ(map_result.get_offsets()[0], 2); + EXPECT_EQ(map_result.get_offsets()[1], 3); + EXPECT_EQ(map_result.get_offsets()[2], 3); + const auto& keys = assert_cast( + expect_not_null_nullable_nested_column(map_result.get_keys())); + EXPECT_EQ(keys.get_element(0), 1); + EXPECT_EQ(keys.get_element(1), 2); + EXPECT_EQ(keys.get_element(2), 3); + const auto& nullable_values = assert_cast(map_result.get_values()); + const auto& value_struct = + assert_cast(nullable_values.get_nested_column()); + ASSERT_EQ(value_struct.get_columns().size(), 3); + const auto& b_values = assert_cast( + expect_not_null_nullable_nested_column(value_struct.get_column(0))); + const auto& missing_values = value_struct.get_column(1); + const auto& a_values = assert_cast( + expect_not_null_nullable_nested_column(value_struct.get_column(2))); + EXPECT_EQ(b_values.get_data_at(0).to_string(), "ma"); + EXPECT_EQ(b_values.get_data_at(1).to_string(), "mb"); + EXPECT_EQ(b_values.get_data_at(2).to_string(), "mc"); + expect_nullable_column_all_null(missing_values); + EXPECT_EQ(a_values.get_element(0), 10); + EXPECT_EQ(a_values.get_element(1), 20); + EXPECT_EQ(a_values.get_element(2), 30); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, MaterializeMapKeyStructReordersRenamedChildren) { + const auto int_type = std::make_shared(); + const auto string_type = std::make_shared(); + const auto file_key_type = + std::make_shared(DataTypes {int_type, string_type}, Strings {"a", "b"}); + const auto table_key_type = std::make_shared( + DataTypes {string_type, int_type}, Strings {"renamed_b", "renamed_a"}); + const auto file_map_type = std::make_shared(file_key_type, int_type); + const auto table_map_type = std::make_shared(table_key_type, int_type); + + ColumnMapping a_mapping; + a_mapping.table_column_name = "renamed_a"; + a_mapping.file_column_name = "a"; + a_mapping.file_local_id = 0; + a_mapping.table_type = int_type; + a_mapping.file_type = int_type; + a_mapping.is_trivial = true; + + ColumnMapping b_mapping; + b_mapping.table_column_name = "renamed_b"; + b_mapping.file_column_name = "b"; + b_mapping.file_local_id = 1; + b_mapping.table_type = string_type; + b_mapping.file_type = string_type; + b_mapping.is_trivial = true; + + ColumnMapping key_mapping; + key_mapping.table_column_name = "key"; + key_mapping.file_column_name = "key"; + key_mapping.file_local_id = 0; + key_mapping.table_type = table_key_type; + key_mapping.file_type = file_key_type; + key_mapping.is_trivial = false; + key_mapping.child_mappings = {b_mapping, a_mapping}; + + ColumnMapping value_mapping; + value_mapping.table_column_name = "value"; + value_mapping.file_column_name = "value"; + value_mapping.file_local_id = 1; + value_mapping.table_type = int_type; + value_mapping.file_type = int_type; + value_mapping.is_trivial = true; + + ColumnMapping map_mapping; + map_mapping.table_column_name = "kv"; + map_mapping.file_column_name = "kv"; + map_mapping.table_type = table_map_type; + map_mapping.file_type = file_map_type; + map_mapping.is_trivial = false; + map_mapping.child_mappings = {key_mapping, value_mapping}; + + auto a_keys = ColumnInt32::create(); + a_keys->insert_value(10); + a_keys->insert_value(20); + a_keys->insert_value(30); + auto b_keys = ColumnString::create(); + b_keys->insert_value("x"); + b_keys->insert_value("y"); + b_keys->insert_value("z"); + MutableColumns key_children; + key_children.push_back(std::move(a_keys)); + key_children.push_back(std::move(b_keys)); + auto key_column = ColumnStruct::create(std::move(key_children)); + + auto value_column = ColumnInt32::create(); + value_column->insert_value(100); + value_column->insert_value(200); + value_column->insert_value(300); + auto offsets_column = ColumnArray::ColumnOffsets::create(); + offsets_column->insert_value(2); + offsets_column->insert_value(3); + ColumnPtr file_column = ColumnMap::create(std::move(key_column), std::move(value_column), + std::move(offsets_column)); + + TableReaderMaterializeTestHelper reader; + ColumnPtr result_column; + ASSERT_TRUE(reader._materialize_map_mapping_column(map_mapping, file_column, 2, &result_column) + .ok()); + + const auto& result_map = assert_cast(*result_column); + EXPECT_EQ(result_map.get_offsets()[0], 2); + EXPECT_EQ(result_map.get_offsets()[1], 3); + const auto& result_key = assert_cast(result_map.get_keys()); + ASSERT_EQ(result_key.get_columns().size(), 2); + const auto& b_result = assert_cast(result_key.get_column(0)); + const auto& a_result = assert_cast(result_key.get_column(1)); + EXPECT_EQ(b_result.get_data_at(0).to_string(), "x"); + EXPECT_EQ(b_result.get_data_at(1).to_string(), "y"); + EXPECT_EQ(b_result.get_data_at(2).to_string(), "z"); + EXPECT_EQ(a_result.get_element(0), 10); + EXPECT_EQ(a_result.get_element(1), 20); + EXPECT_EQ(a_result.get_element(2), 30); + + const auto& result_value = assert_cast(result_map.get_values()); + EXPECT_EQ(result_value.get_element(0), 100); + EXPECT_EQ(result_value.get_element(1), 200); + EXPECT_EQ(result_value.get_element(2), 300); +} + +// Scenario: map value struct materialization follows DataTypeStruct field order even when +// ColumnMapping children arrive in a different order from projected ColumnDefinition children. +TEST(TableReaderTest, MaterializeMapValueStructUsesTableTypeOrder) { + const auto key_type = std::make_shared(); + const auto string_type = std::make_shared(); + const auto file_value_type = std::make_shared( + DataTypes {string_type, string_type}, Strings {"full_name", "gender"}); + const auto table_value_type = std::make_shared( + DataTypes {string_type, string_type}, Strings {"full_name", "gender"}); + const auto file_map_type = std::make_shared(key_type, file_value_type); + const auto table_map_type = std::make_shared(key_type, table_value_type); + + ColumnMapping full_name_mapping; + full_name_mapping.table_column_name = "full_name"; + full_name_mapping.file_column_name = "full_name"; + full_name_mapping.file_local_id = 0; + full_name_mapping.table_type = string_type; + full_name_mapping.file_type = string_type; + full_name_mapping.is_trivial = true; + + ColumnMapping gender_mapping; + gender_mapping.table_column_name = "gender"; + gender_mapping.file_column_name = "gender"; + gender_mapping.file_local_id = 1; + gender_mapping.table_type = string_type; + gender_mapping.file_type = string_type; + gender_mapping.is_trivial = true; + + ColumnMapping value_mapping; + value_mapping.table_column_name = "value"; + value_mapping.file_column_name = "value"; + value_mapping.file_local_id = 1; + value_mapping.table_type = table_value_type; + value_mapping.file_type = file_value_type; + value_mapping.is_trivial = false; + value_mapping.child_mappings = {gender_mapping, full_name_mapping}; + + ColumnMapping key_mapping; + key_mapping.table_column_name = "key"; + key_mapping.file_column_name = "key"; + key_mapping.file_local_id = 0; + key_mapping.table_type = key_type; + key_mapping.file_type = key_type; + key_mapping.is_trivial = true; + + ColumnMapping map_mapping; + map_mapping.table_column_name = "new_map_column"; + map_mapping.file_column_name = "new_map_column"; + map_mapping.table_type = table_map_type; + map_mapping.file_type = file_map_type; + map_mapping.is_trivial = false; + map_mapping.child_mappings = {key_mapping, value_mapping}; + + auto key_column = ColumnString::create(); + key_column->insert_value("person10"); + key_column->insert_value("person20"); + + auto full_name_column = ColumnString::create(); + full_name_column->insert_value("Jack"); + full_name_column->insert_value("James Lee"); + auto gender_column = ColumnString::create(); + gender_column->insert_value("Male"); + gender_column->insert_value("Male"); + MutableColumns value_children; + value_children.push_back(std::move(full_name_column)); + value_children.push_back(std::move(gender_column)); + auto value_column = ColumnStruct::create(std::move(value_children)); + + auto offsets_column = ColumnArray::ColumnOffsets::create(); + offsets_column->insert_value(1); + offsets_column->insert_value(2); + ColumnPtr file_column = ColumnMap::create(std::move(key_column), std::move(value_column), + std::move(offsets_column)); + + TableReaderMaterializeTestHelper reader; + ColumnPtr result_column; + ASSERT_TRUE(reader._materialize_map_mapping_column(map_mapping, file_column, 2, &result_column) + .ok()); + + const auto& result_map = assert_cast(*result_column); + const auto& result_value = assert_cast(result_map.get_values()); + ASSERT_EQ(result_value.get_columns().size(), 2); + const auto& full_name_result = assert_cast(result_value.get_column(0)); + const auto& gender_result = assert_cast(result_value.get_column(1)); + EXPECT_EQ(full_name_result.get_data_at(0).to_string(), "Jack"); + EXPECT_EQ(full_name_result.get_data_at(1).to_string(), "James Lee"); + EXPECT_EQ(gender_result.get_data_at(0).to_string(), "Male"); + EXPECT_EQ(gender_result.get_data_at(1).to_string(), "Male"); +} + +TEST(TableReaderTest, PushDownMinMaxOnlyUsesSelectedRowGroupInFileRange) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_minmax_range_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_int_pair_parquet_file(file_path, {10, 1, 100}, {100, 10, 1000}, {"ten", "one", "hundred"}, + 1); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .push_down_agg_type = TPushAggOp::type::MINMAX, + }) + .ok()); + ASSERT_TRUE(reader.prepare_split(build_split_options_for_row_group_mid(file_path, 1)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 2); + const auto& id_column = assert_cast(expect_not_null_table_column(block, 0)); + EXPECT_EQ(id_column.get_element(0), 1); + EXPECT_EQ(id_column.get_element(1), 1); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, PushDownCountOnlyUsesSelectedRowGroupInFileRange) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_count_range_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}, 1); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .push_down_agg_type = TPushAggOp::type::COUNT, + }) + .ok()); + ASSERT_TRUE(reader.prepare_split(build_split_options_for_row_group_mid(file_path, 2)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 1); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, PushDownCountFallsBackWithTableConjunct) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_count_conjunct_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {prepared_conjunct( + &state, table_int32_greater_than_expr(0, 0, 2))}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .push_down_agg_type = TPushAggOp::type::COUNT, + }) + .ok()); + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 1); + const auto& id_column = assert_cast(expect_not_null_table_column(block, 0)); + EXPECT_EQ(id_column.get_element(0), 3); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, PushDownCountFallsBackWithColumnPredicate) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_count_predicate_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}, 1); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + TableColumnPredicates column_predicates; + add_column_predicate(&column_predicates, GlobalIndex(0), + create_comparison_predicate( + 0, "id", make_nullable(std::make_shared()), + Field::create_field(2), false)); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = std::move(column_predicates), + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .push_down_agg_type = TPushAggOp::type::COUNT, + }) + .ok()); + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 1); + const auto& id_column = assert_cast(expect_not_null_table_column(block, 0)); + EXPECT_EQ(id_column.get_element(0), 3); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, PushDownMinMaxFallsBackWithoutDirectFileMapping) { + const auto test_dir = std::filesystem::temp_directory_path() / + "doris_table_reader_minmax_missing_mapping_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_parquet_file(file_path, 1, "one"); + + std::vector projected_columns; + projected_columns.push_back( + make_table_column(99, "missing_id", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + .push_down_agg_type = TPushAggOp::type::MINMAX, + }) + .ok()); + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 1); + expect_nullable_column_all_null(*block.get_by_position(0).column); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, OpenReaderBuildsTableFiltersFromConjuncts) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_conjunct_filter_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_parquet_file(file_path, 3, "three"); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(1, "value", std::make_shared())); + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {prepared_conjunct( + &state, table_int32_greater_than_expr(1, 1, 2))}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + // open_reader() should convert the table-level conjunct on projected column id 1 into + // _table_filters before ColumnMapper creates the FileScanRequest. ColumnMapper then rewrites + // the conjunct's slot ref from table column id 1 to the file-local block position used by + // ParquetReader. The projection order intentionally puts value before id, so the id filter + // column is not at position 0 in the file block. + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + const auto& id_column = assert_cast(expect_not_null_table_column(block, 1)); + ASSERT_EQ(id_column.size(), 1); + EXPECT_EQ(id_column.get_element(0), 3); + + ASSERT_TRUE(reader.close().ok()); + + TableReader filtered_reader; + ASSERT_TRUE(filtered_reader + .init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {prepared_conjunct( + &state, table_int32_greater_than_expr(1, 1, 4))}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + ASSERT_TRUE(filtered_reader.prepare_split(build_split_options(file_path)).ok()); + + block = build_table_block(projected_columns); + eos = false; + ASSERT_TRUE(filtered_reader.get_block(&block, &eos).ok()); + EXPECT_TRUE(eos); + EXPECT_EQ(block.get_by_position(1).column->size(), 0); + + ASSERT_TRUE(filtered_reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, OpenReaderBuildsColumnPredicateFilters) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_column_predicate_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + // ColumnPredicate is only used for row-group/statistics pruning. Keep one row per row + // group so the predicate can prune the first two row groups and leave only id = 3. + write_int_pair_parquet_file(file_path, {1, 2, 3}, {1, 5, 8}, {"one", "two", "three"}, 1); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(2, "value", std::make_shared())); + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + TableColumnPredicates column_predicates; + add_column_predicate(&column_predicates, GlobalIndex(1), + create_comparison_predicate( + 0, "id", make_nullable(std::make_shared()), + Field::create_field(2), false)); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = std::move(column_predicates), + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + + const auto& value_column = + assert_cast(expect_not_null_table_column(block, 0)); + const auto& id_column = assert_cast(expect_not_null_table_column(block, 1)); + ASSERT_EQ(id_column.size(), 1); + ASSERT_EQ(value_column.size(), 1); + EXPECT_EQ(id_column.get_element(0), 3); + EXPECT_EQ(value_column.get_data_at(0).to_string(), "three"); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, ColumnPredicateSurvivesReopenSplit) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_predicate_reopen_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const std::vector file_paths = { + (test_dir / "split_1.parquet").string(), + (test_dir / "split_2.parquet").string(), + }; + write_int_pair_parquet_file(file_paths[0], {1, 3}, {10, 30}, {"one", "three"}, 1); + write_int_pair_parquet_file(file_paths[1], {2, 4}, {20, 40}, {"two", "four"}, 1); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + + TableColumnPredicates column_predicates; + add_column_predicate(&column_predicates, GlobalIndex(0), + create_comparison_predicate( + 0, "id", make_nullable(std::make_shared()), + Field::create_field(2), false)); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = std::move(column_predicates), + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + std::vector ids; + for (const auto& file_path : file_paths) { + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + const auto& id_column = + assert_cast(expect_not_null_table_column(block, 0)); + ASSERT_EQ(id_column.size(), 1); + ids.push_back(id_column.get_element(0)); + + ASSERT_TRUE(reader.close().ok()); + } + + EXPECT_EQ(ids, std::vector({3, 4})); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, CreateScanRequestDeduplicatesSharedPredicateColumns) { + const auto int_type = std::make_shared(); + const std::vector projected_columns = { + make_table_column(0, "a", int_type), + make_table_column(1, "b", int_type), + make_table_column(2, "c", int_type), + make_table_column(3, "value", std::make_shared()), + }; + const std::vector file_schema = { + make_file_column(0, "a", int_type), + make_file_column(1, "b", int_type), + make_file_column(2, "c", int_type), + make_file_column(3, "value", std::make_shared()), + }; + + TableColumnMapper mapper; + ASSERT_TRUE(mapper.create_mapping(projected_columns, {}, file_schema).ok()); + + std::vector table_filters; + table_filters.push_back({ + // This test only needs the referenced global indices to drive predicate-column + // placement. Keep the conjunct empty so the assertion focuses on scan-column + // de-duplication rather than expression rewrite/prepare behavior. + .conjunct = nullptr, + .global_indices = {GlobalIndex(0), GlobalIndex(1)}, + }); + table_filters.push_back({ + .conjunct = nullptr, + .global_indices = {GlobalIndex(0), GlobalIndex(2)}, + }); + + FileScanRequest file_request; + ASSERT_TRUE( + mapper.create_scan_request(table_filters, {}, projected_columns, &file_request).ok()); + + // Both filters reference column a. It must still be read once as a predicate column, and a + // predicate column must not be repeated as a non-predicate column. + EXPECT_EQ(projection_ids(file_request.predicate_columns), std::vector({0, 1, 2})); + EXPECT_EQ(projection_ids(file_request.non_predicate_columns), std::vector({3})); + ASSERT_EQ(file_request.local_positions.size(), 4); + EXPECT_EQ(file_request.local_positions.at(LocalColumnId(3)).value(), 0); + EXPECT_EQ(file_request.local_positions.at(LocalColumnId(0)).value(), 1); + EXPECT_EQ(file_request.local_positions.at(LocalColumnId(1)).value(), 2); + EXPECT_EQ(file_request.local_positions.at(LocalColumnId(2)).value(), 3); + const auto predicate_column_ids = projection_ids(file_request.predicate_columns); + const auto non_predicate_column_ids = projection_ids(file_request.non_predicate_columns); + for (const auto predicate_column_id : predicate_column_ids) { + EXPECT_TRUE(std::find(non_predicate_column_ids.begin(), non_predicate_column_ids.end(), + predicate_column_id) == non_predicate_column_ids.end()); + } +} + +TEST(TableReaderTest, CreateScanRequestPromotesProjectedColumnToPredicateColumn) { + const auto int_type = std::make_shared(); + const std::vector projected_columns = { + make_table_column(0, "id", int_type), + make_table_column(1, "score", int_type), + }; + const std::vector file_schema = { + make_file_column(0, "id", int_type), + make_file_column(1, "score", int_type), + }; + + TableColumnMapper mapper; + ASSERT_TRUE(mapper.create_mapping(projected_columns, {}, file_schema).ok()); + + TableFilter table_filter { + .conjunct = VExprContext::create_shared(table_int32_greater_than_expr(0, 0, 1)), + .global_indices = {GlobalIndex(0)}, + }; + + FileScanRequest file_request; + ASSERT_TRUE( + mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request).ok()); + + EXPECT_EQ(projection_ids(file_request.predicate_columns), std::vector({0})); + EXPECT_EQ(projection_ids(file_request.non_predicate_columns), std::vector({1})); + ASSERT_EQ(file_request.local_positions.size(), 2); + EXPECT_EQ(file_request.local_positions.at(LocalColumnId(0)).value(), 1); + EXPECT_EQ(file_request.local_positions.at(LocalColumnId(1)).value(), 0); +} + +TEST(TableReaderTest, CreateScanRequestUsesColumnNameForByNamePredicateMapping) { + const auto int_type = std::make_shared(); + std::vector projected_columns = { + make_table_column(10, "id", int_type), + make_table_column(11, "score", int_type), + }; + const std::vector file_schema = { + make_file_column(0, "ID", int_type), + make_file_column(1, "score", int_type), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + set_name_identifiers(&projected_columns); + ASSERT_TRUE(mapper.create_mapping(projected_columns, {}, file_schema).ok()); + + TableFilter table_filter { + .conjunct = VExprContext::create_shared(table_int32_greater_than_expr(0, 0, 1)), + .global_indices = {GlobalIndex(0)}, + }; + + FileScanRequest file_request; + ASSERT_TRUE( + mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request).ok()); + + EXPECT_EQ(projection_ids(file_request.predicate_columns), std::vector({0})); + EXPECT_EQ(projection_ids(file_request.non_predicate_columns), std::vector({1})); + ASSERT_EQ(file_request.conjuncts.size(), 1); + const auto* localized_slot = + assert_cast(file_request.conjuncts[0]->root()->children()[0].get()); + EXPECT_EQ(localized_slot->slot_id(), 0); + EXPECT_EQ(localized_slot->column_id(), 1); +} + +TEST(TableReaderTest, ColumnPredicateFilterUsesColumnNameForByNameMapping) { + const auto int_type = std::make_shared(); + std::vector projected_columns = { + make_table_column(10, "id", int_type), + make_table_column(11, "score", int_type), + }; + const std::vector file_schema = { + make_file_column(0, "ID", int_type), + make_file_column(1, "score", int_type), + }; + + TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME}); + set_name_identifiers(&projected_columns); + ASSERT_TRUE(mapper.create_mapping(projected_columns, {}, file_schema).ok()); + + TableColumnPredicates column_predicates; + add_column_predicate( + &column_predicates, GlobalIndex(0), + create_comparison_predicate( + 10, "id", make_nullable(int_type), Field::create_field(2), false)); + + FileScanRequest file_request; + ASSERT_TRUE(mapper.create_scan_request({}, column_predicates, projected_columns, &file_request) + .ok()); + + ASSERT_EQ(file_request.column_predicate_filters.size(), 1); + EXPECT_EQ(file_request.column_predicate_filters[0].file_column_id.value(), 0); + EXPECT_EQ(projection_ids(file_request.non_predicate_columns), std::vector({0, 1})); + EXPECT_TRUE(file_request.predicate_columns.empty()); +} + +TEST(TableReaderTest, OpenReaderPushesMultiColumnConjunctToParquetReader) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_multi_conjunct_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3}, {1, 5, 8}, {"one", "two", "three"}); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(2, "value", std::make_shared())); + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + projected_columns.push_back(make_table_column(1, "score", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE( + reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {prepared_conjunct( + &state, table_int32_sum_greater_than_expr(1, 1, 2, 2, 8))}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + // The conjunct references both id and score, so ColumnMapper must put both file columns into + // predicate_columns and rewrite both slot refs to ParquetReader's file-local block positions. + // ParquetReader then evaluates the expression after all predicate columns have been read. + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + + const auto& value_column = + assert_cast(expect_not_null_table_column(block, 0)); + const auto& id_column = assert_cast(expect_not_null_table_column(block, 1)); + const auto& score_column = + assert_cast(expect_not_null_table_column(block, 2)); + ASSERT_EQ(id_column.size(), 1); + ASSERT_EQ(score_column.size(), 1); + ASSERT_EQ(value_column.size(), 1); + EXPECT_EQ(id_column.get_element(0), 3); + EXPECT_EQ(score_column.get_element(0), 8); + EXPECT_EQ(value_column.get_data_at(0).to_string(), "three"); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, ProjectedColumnsFillDefaultForParquetSchemaMismatch) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_schema_mismatch_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_parquet_file(file_path, 1, "one"); + + std::vector projected_columns; + projected_columns.push_back( + make_table_column(99, "missing_value", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + // The table projection asks for field id 99, but the ParquetReader exposes only file-local + // fields 0 and 1. Missing columns are allowed by the current mapper options, so TableReader + // should still use the Parquet row count and fill a default column in table schema. + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + EXPECT_EQ(block.get_by_position(0).column->size(), 1); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, DefaultExprResultMatchesNullableTableType) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_nullable_default_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_parquet_file(file_path, 1, "one"); + + const auto int_type = std::make_shared(); + auto missing_column = make_table_column(99, "c_new", make_nullable(int_type)); + missing_column.default_expr = VExprContext::create_shared( + VLiteral::create_shared(int_type, Field::create_field(42))); + std::vector projected_columns; + projected_columns.push_back(std::move(missing_column)); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + auto status = reader.get_block(&block, &eos); + ASSERT_TRUE(status.ok()) << status.to_string(); + ASSERT_FALSE(eos); + + const auto& result = block.get_by_position(0); + ASSERT_TRUE(result.check_type_and_column_match().ok()); + EXPECT_TRUE(result.type->is_nullable()); + ASSERT_TRUE(result.column->is_nullable()); + const auto& nullable_column = assert_cast(*result.column); + ASSERT_EQ(nullable_column.size(), 1); + EXPECT_EQ(nullable_column.get_null_map_data()[0], 0); + const auto& values = assert_cast(nullable_column.get_nested_column()); + EXPECT_EQ(values.get_element(0), 42); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, DefaultExprAlignsNestedNullableArrayTableType) { + const auto test_dir = std::filesystem::temp_directory_path() / + "doris_table_reader_nested_nullable_array_default_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_parquet_file(file_path, 1, "one"); + + const auto bigint_type = std::make_shared(); + const auto array_type = std::make_shared(make_nullable(bigint_type)); + const auto table_type = make_nullable(array_type); + auto missing_column = make_table_column(99, "single_element_groups", table_type); + missing_column.default_expr = VExprContext::create_shared( + std::make_shared(table_type)); + std::vector projected_columns; + projected_columns.push_back(std::move(missing_column)); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + auto status = reader.get_block(&block, &eos); + ASSERT_TRUE(status.ok()) << status.to_string(); + ASSERT_FALSE(eos); + + const auto& result = block.get_by_position(0); + ASSERT_TRUE(result.check_type_and_column_match().ok()); + ASSERT_TRUE(result.column->is_nullable()); + const auto& nullable_column = assert_cast(*result.column); + ASSERT_EQ(nullable_column.size(), 1); + EXPECT_EQ(nullable_column.get_null_map_data()[0], 0); + + const auto& array_column = assert_cast(nullable_column.get_nested_column()); + ASSERT_EQ(array_column.size(), 1); + EXPECT_EQ(array_column.get_offsets()[0], 1); + ASSERT_TRUE(array_column.get_data().is_nullable()); + const auto& nested_nullable = assert_cast(array_column.get_data()); + ASSERT_EQ(nested_nullable.size(), 1); + EXPECT_EQ(nested_nullable.get_null_map_data()[0], 0); + const auto& values = assert_cast(nested_nullable.get_nested_column()); + EXPECT_EQ(values.get_element(0), 7); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, ProjectedColumnsFillMissingParquetColumnWithDefault) { + const auto test_dir = std::filesystem::temp_directory_path() / + "doris_table_reader_schema_mismatch_reject_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_parquet_file(file_path, 1, "one"); + + std::vector projected_columns; + projected_columns.push_back( + make_table_column(99, "missing_value", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + const auto status = reader.get_block(&block, &eos); + ASSERT_TRUE(status.ok()) << status.to_string(); + ASSERT_FALSE(eos); + + const auto& result = block.get_by_position(0); + ASSERT_TRUE(result.check_type_and_column_match().ok()); + // A missing scalar column without an explicit default is materialized as a default-value + // column. It may stay constant, so verify through the IColumn interface instead of assuming a + // concrete ColumnString instance. + ASSERT_EQ(result.column->size(), 1); + EXPECT_EQ(result.column->get_data_at(0).to_string(), ""); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, ProjectedStructFillsMissingChildWithDefault) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_struct_missing_child_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_struct_parquet_file(file_path, 7); + + const auto int_type = std::make_shared(); + const auto string_type = std::make_shared(); + auto id_child = make_table_column(0, "id", int_type); + auto missing_child = make_table_column(99, "missing_child", string_type); + auto struct_type = std::make_shared(DataTypes {int_type, string_type}, + Strings {"id", "missing_child"}); + auto struct_column = make_table_column(100, "s", struct_type); + struct_column.children = {id_child, missing_child}; + std::vector projected_columns = {struct_column}; + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + + const auto& struct_result = + assert_cast(expect_not_null_table_column(block, 0)); + ASSERT_EQ(struct_result.get_columns().size(), 2); + const auto& ids = assert_cast( + expect_not_null_nullable_nested_column(struct_result.get_column(0))); + ASSERT_EQ(struct_result.size(), 1); + EXPECT_EQ(ids.get_element(0), 7); + expect_nullable_column_all_null(struct_result.get_column(1)); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, ReusedBlockClearsProjectedStructWithNullableChild) { + const auto test_dir = std::filesystem::temp_directory_path() / + "doris_table_reader_struct_nullable_child_reuse_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_struct_with_nullable_child_parquet_file(file_path); + + const auto int_type = std::make_shared(); + const auto string_type = std::make_shared(); + const auto nullable_string_type = make_nullable(string_type); + auto id_child = make_table_column(0, "id", int_type); + auto note_child = make_table_column(1, "note", nullable_string_type); + auto missing_child = make_table_column(99, "missing_child", string_type); + auto struct_type = std::make_shared( + DataTypes {int_type, nullable_string_type, string_type}, + Strings {"id", "note", "missing_child"}); + auto struct_column = make_table_column(100, "s", struct_type); + struct_column.children = {id_child, note_child, missing_child}; + std::vector projected_columns = {struct_column}; + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + ASSERT_EQ(block.rows(), 2); + const auto& struct_result = + assert_cast(expect_not_null_table_column(block, 0)); + const auto& notes = assert_cast(struct_result.get_column(1)); + EXPECT_FALSE(notes.is_null_at(0)); + EXPECT_TRUE(notes.is_null_at(1)); + + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + EXPECT_TRUE(eos); + EXPECT_EQ(block.rows(), 0); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, ProjectedPartitionColumnUsesSplitPartitionValue) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_partition_value_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_parquet_file(file_path, 1, "one"); + + std::vector projected_columns; + auto partition_column = make_table_column(1, "value", std::make_shared()); + partition_column.is_partition_key = true; + projected_columns.push_back(std::move(partition_column)); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + auto split_options = build_split_options(file_path); + split_options.partition_values.emplace("value", Field::create_field("p1")); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + // The file has a physical column with the same id/name. The split partition value should still + // take precedence and be materialized by TableReader. + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + + const auto partition_value = block.get_by_position(0).column->convert_to_full_column_if_const(); + const auto& partition_value_data = assert_cast( + expect_not_null_nullable_nested_column(*partition_value)); + ASSERT_EQ(partition_value_data.size(), 1); + EXPECT_EQ(partition_value_data.get_data_at(0).to_string(), "p1"); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, ConstantPartitionFilterSkipsSplitWhenFalse) { + const auto test_dir = std::filesystem::temp_directory_path() / + "doris_table_reader_constant_partition_filter_skip_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_parquet_file(file_path, 1, "one"); + + std::vector projected_columns; + auto partition_column = make_table_column(0, "part", std::make_shared()); + partition_column.is_partition_key = true; + projected_columns.push_back(std::move(partition_column)); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {prepared_conjunct( + &state, table_int32_greater_than_expr(0, 0, 10))}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + auto split_options = build_split_options(file_path); + split_options.partition_values.emplace("part", Field::create_field(7)); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + EXPECT_TRUE(eos); + EXPECT_EQ(block.get_by_position(0).column->size(), 0); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, ConstantPartitionFilterKeepsSplitWhenTrue) { + const auto test_dir = std::filesystem::temp_directory_path() / + "doris_table_reader_constant_partition_filter_keep_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_parquet_file(file_path, 1, "one"); + + std::vector projected_columns; + auto partition_column = make_table_column(0, "part", std::make_shared()); + partition_column.is_partition_key = true; + projected_columns.push_back(std::move(partition_column)); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {prepared_conjunct( + &state, table_int32_greater_than_expr(0, 0, 1))}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + auto split_options = build_split_options(file_path); + split_options.partition_values.emplace("part", Field::create_field(7)); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + + expect_int32_column_values(*block.get_by_position(0).column, {7}); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, RuntimeFilterOnConstantPartitionIsNotPreExecuted) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_constant_runtime_filter"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_parquet_file(file_path, 1, "one"); + + std::vector projected_columns; + auto partition_column = make_table_column(0, "part", std::make_shared()); + partition_column.is_partition_key = true; + projected_columns.push_back(std::move(partition_column)); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE( + reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {prepared_conjunct( + &state, runtime_filter_wrapper_expr( + table_int32_greater_than_expr(0, 0, 1)))}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + auto split_options = build_split_options(file_path); + split_options.partition_values.emplace("part", Field::create_field(7)); + ASSERT_TRUE(reader.prepare_split(split_options).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + const auto status = reader.get_block(&block, &eos); + ASSERT_TRUE(status.ok()) << status.to_string(); + ASSERT_FALSE(eos); + expect_int32_column_values(*block.get_by_position(0).column, {7}); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, ParquetReaderReadsOnlyRowGroupsInFileRange) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_file_range_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, + {"range_group_one", "range_group_two", "range_group_three"}, 1); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + projected_columns.push_back(make_table_column(2, "value", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + ASSERT_TRUE(reader.prepare_split(build_split_options_for_row_group_mid(file_path, 1)).ok()); + + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + + const auto& id_column = assert_cast(expect_not_null_table_column(block, 0)); + const auto& value_column = + assert_cast(expect_not_null_table_column(block, 1)); + ASSERT_EQ(block.rows(), 1); + EXPECT_EQ(id_column.get_element(0), 2); + EXPECT_EQ(value_column.get_data_at(0).to_string(), "range_group_two"); + + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + EXPECT_TRUE(eos); + EXPECT_EQ(block.rows(), 0); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, ProjectedColumnsUseMapperExpressionForSameNameDifferentIdParquetSchema) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_same_name_diff_id_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_parquet_file(file_path, 1, "one"); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(99, "id", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + // The table column has the same name as the Parquet field, but a different field id. + // ColumnMapper should still resolve it by name and build a SlotRef projection from the file + // column into the requested table column. + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + + const auto& id_column = assert_cast(expect_not_null_table_column(block, 0)); + ASSERT_EQ(id_column.size(), 1); + EXPECT_EQ(id_column.get_element(0), 1); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +TEST(TableReaderTest, ProjectedColumnsUseMapperExpressionsForParquetSchemaMismatch) { + const auto test_dir = + std::filesystem::temp_directory_path() / "doris_table_reader_mapper_expr_test"; + std::filesystem::remove_all(test_dir); + std::filesystem::create_directories(test_dir); + + const auto file_path = (test_dir / "split.parquet").string(); + write_parquet_file(file_path, 7, "seven"); + + std::vector projected_columns; + projected_columns.push_back(make_table_column(0, "id", std::make_shared())); + projected_columns.push_back(make_table_column(1, "value", std::make_shared())); + + RuntimeState state {TQueryOptions(), TQueryGlobals()}; + set_name_identifiers(&projected_columns); + TableReader reader; + ASSERT_TRUE(reader.init({ + .projected_columns = projected_columns, + .column_predicates = {}, + .conjuncts = {}, + .format = FileFormat::PARQUET, + .scan_params = nullptr, + .io_ctx = nullptr, + .runtime_state = &state, + .scanner_profile = nullptr, + }) + .ok()); + + ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok()); + + // The table projection requests id as BIGINT instead of the file INT, so ColumnMapper should + // build a Cast expression. The second field has the same type and should build a SlotRef + // projection. Both columns should still materialize in table schema order. + Block block = build_table_block(projected_columns); + bool eos = false; + ASSERT_TRUE(reader.get_block(&block, &eos).ok()); + ASSERT_FALSE(eos); + + ASSERT_EQ(block.get_by_position(0).name, "id"); + ASSERT_EQ(block.get_by_position(1).name, "value"); + const auto& id_column = assert_cast(expect_not_null_table_column(block, 0)); + const auto& value_column = + assert_cast(expect_not_null_table_column(block, 1)); + ASSERT_EQ(id_column.size(), 1); + ASSERT_EQ(value_column.size(), 1); + EXPECT_EQ(id_column.get_element(0), 7); + EXPECT_EQ(value_column.get_data_at(0).to_string(), "seven"); + + ASSERT_TRUE(reader.close().ok()); + std::filesystem::remove_all(test_dir); +} + +} // namespace +} // namespace doris::format diff --git a/docs/doris-iceberg-parquet-api-design.md b/docs/doris-iceberg-parquet-api-design.md new file mode 100644 index 00000000000000..457550a932da67 --- /dev/null +++ b/docs/doris-iceberg-parquet-api-design.md @@ -0,0 +1,511 @@ +# Doris Iceberg + Parquet 新架构 API 设计 + +本文档用于描述 Doris 中 Iceberg + Parquet 新架构的 API 设计。本文档作为后续从 +`master` 新开重构分支时的起点,只定义 API 形状、职责边界、依赖方向和兼容原则, +不定义函数实现细节,不提供伪代码,不包含迁移 patch。 + +## 架构总览 + +目标架构包含 table 调度层、表格式语义层、schema 映射层、文件通用层和文件格式实现层: + +```text +FileScanner / split producer + -> +TableReader + -> +IcebergTableReader + -> +TableColumnMapper + FileReader + -> +ParquetReader +``` + +核心职责如下: + +- `TableReader` + 负责多文件、多 split 的上层调度,统一 scan 生命周期,对外输出 table block, + 并承接动态分区裁剪等 table-level 通用逻辑。 +- `IcebergTableReader` + 负责 Iceberg 表语义,包括 schema 绑定、scan task、delete file、虚拟列和 table + block finalize。 +- `TableColumnMapper` + 负责 table schema 到 file schema 的映射,负责 filter localization 和 schema + change 映射。 +- `FileReader` + 负责文件层通用读取接口,只理解 file-local schema 和 file-local scan request。 +- `ParquetReader` + 作为 `FileReader` 的 Parquet 实现,负责 Parquet 文件物理读取。 + +依赖方向必须保持单向: + +```text +TableReader + -> IcebergTableReader + -> TableColumnMapper + -> FileReader + -> ParquetReader +``` + +低层不反向理解高层语义,尤其 `ParquetReader` 不得反向理解 Iceberg/global schema。 + +## 核心 API 设计 + +### TableReader + +`TableReader` 是最上层读取接口,作为 `IcebergTableReader` 的基类,负责多 split / +多 file 调度,并承接 table-level 的通用裁剪逻辑,不下沉文件格式语义。 + +实际 API 文件: + +```text +be/src/format_v2/table_reader.h +``` + +实际命名空间: + +```cpp +namespace doris::format +``` + +建议职责: + +- 接收 split 列表或 scan task 列表; +- 控制当前 reader 的创建、切换和关闭; +- 管理 scan 生命周期; +- 承接动态分区裁剪等 table-level 通用过滤逻辑; +- 对外统一输出 table block。 +- `next` 是基类统一入口,内部负责 EOF 后切换 reader;具体表格式只提供打开和读取 + 当前 reader 的 hook。 + +建议接口形状: + +```cpp +namespace doris::format { + +class TableReader { +public: + virtual ~TableReader() = default; + + virtual Status init(const TableReadOptions& options); + virtual Status filter(const VExprContextSPtr& expr, bool* can_filter_all); + Status next(Block* table_block, size_t* rows, bool* eof); + virtual Status close(); + +protected: + Status next_reader(); + virtual Status open_next_reader(bool* has_reader); + virtual Status read_current(Block* table_block, size_t* rows, bool* eof); + virtual Status close_current_reader(); +}; + +} // namespace doris::format +``` + +接口约束: + +- `TableReader` 输出的是 table block,不输出 file-local block。 +- `TableReader` 负责多文件编排和 table-level 通用裁剪,不负责 schema mapping,不负责 + Parquet 物理解码。 +- `next_reader` 是 `TableReader` 自己的通用切换逻辑,不作为子类公开 override 接口。 +- 动态分区裁剪这类逻辑应下放到 `TableReader`,而不是散落在具体表格式 reader 中。 +- `TableReader` 不直接依赖旧 `vparquet` 表层语义。 + +### IcebergTableReader + +`IcebergTableReader` 是 Iceberg 表语义层,负责把单个 Iceberg data file 的读取组织成 +table 语义输出。 + +实际 API 文件: + +```text +be/src/format_v2/table/iceberg_reader.h +``` + +实际命名空间: + +```cpp +namespace doris::iceberg +``` + +建议职责: + +- 绑定 Iceberg 当前 table schema; +- 接收 `IcebergScanTask` 列表,并按 `TableReader` 的统一调度打开当前 task; +- 处理 position delete、equality delete、deletion vector; +- 物化 `_row_id`、`_last_updated_sequence_number` 等虚拟列; +- 将 `ParquetReader` 返回的 file-local block finalize 成 table block。 + +建议接口形状: + +```cpp +namespace doris::iceberg { + +class IcebergTableReader : public format::TableReader { +public: + virtual ~IcebergTableReader() = default; + + Status init(IcebergTableReadParams params); + Status close() override; + +protected: + Status open_next_reader(bool* has_reader) override; + Status read_current(Block* table_block, size_t* rows, bool* eof) override; + Status close_current_reader() override; +}; + +} // namespace doris::iceberg +``` + +接口约束: + +- `IcebergTableReader` 继承 `TableReader`,并通过组合使用 `FileReader`。 +- `IcebergTableReader` 不做 Parquet page/column 解码。 +- `IcebergTableReader` 负责 table-level finalize,不负责 file-local pruning 实现。 +- `IcebergTableReader` 的 schema、scan request、scan tasks 和底层 `FileReader` 应通过 + 一个初始化参数对象一次性传入;除非存在明确生命周期差异,不拆成 `bind` / + `init(TableScanRequest)` / `set_scan_tasks` 多阶段接口。 +- `IcebergTableReader` 不重新实现 reader 切换循环,只实现打开 Iceberg task、读取当前 + task 和关闭当前 reader 的 hook。 + +### TableColumnMapper + +`TableColumnMapper` 是 table schema 到 file schema 的通用映射层,不是 +Iceberg-only 组件。 + +实际 API 文件: + +```text +be/src/format_v2/table_reader.h +``` + +实际命名空间: + +```cpp +namespace doris::format +``` + +建议职责: + +- 输入 table schema、file schema、table scan request; +- 输出 `ColumnMapping` 和通用 `FileScanRequest`; +- 负责 filter localization; +- 负责 schema change 映射; +- 负责复杂列 child mapping; +- 负责缺失列、default、partition、generated 列的 finalize 语义描述。 + +建议接口形状: + +```cpp +namespace doris::format { + +class TableColumnMapper { +public: + explicit TableColumnMapper(TableColumnMapperOptions options = {}); + + virtual Status create_mapping(const std::vector& table_schema, + const std::vector& file_schema, + std::vector* mappings); + + virtual Status create_scan_request(const TableScanRequest& table_request, + const std::vector& mappings, + FileScanRequest* file_request); +}; + +} // namespace doris::format +``` + +接口约束: + +- `TableColumnMapper` 的输入是 table schema + file schema + table scan request。 +- `TableColumnMapper` 的输出是 `ColumnMapping` + `FileScanRequest`。 +- `TableColumnMapper` 必须是通用层,不做 Iceberg-only 命名。 +- Iceberg 场景默认按 field id 映射;按 name 映射不是本轮默认路径。 + +### FileReader + +`FileReader` 是文件物理读取层的通用接口,为后续 Parquet 之外的文件格式适配预留。 + +实际 API 文件: + +```text +be/src/format_v2/file_reader.h +``` + +实际命名空间: + +```cpp +namespace doris::format +``` + +建议职责: + +- 打开物理文件; +- 暴露 file-local schema; +- 接收 `FileScanRequest`; +- 输出 file-local block; +- 不理解 table/global schema。 + +建议接口形状: + +```cpp +namespace doris::format { + +class FileReader { +public: + virtual ~FileReader() = default; + + virtual Status open(io::FileReaderSPtr file, io::IOContext* io_ctx = nullptr); + virtual Status get_schema(std::vector* file_schema) const; + virtual Status init(const FileScanRequest& request); + virtual Status next(Block* file_block, size_t* rows, bool* eof); + virtual Status close(); +}; + +} // namespace doris::format +``` + +接口约束: + +- `FileReader` 输出的是 file-local block,不输出 table/global schema block。 +- `FileReader` 不处理 Iceberg schema evolution、default/generated/partition 列。 +- `IcebergTableReader` 组合 `FileReader`,不直接绑定具体文件格式 reader。 + +### ParquetReader + +`ParquetReader` 是 `FileReader` 的 Parquet 实现,只负责 Parquet file-local schema +和 Parquet file-local scan request。 + +实际 API 文件: + +```text +be/src/format/parquet/parquet_reader.h +``` + +实际命名空间: + +```cpp +namespace doris::parquet +``` + +建议职责: + +- 打开 Parquet 文件; +- 解析 footer 和 file schema; +- 接收 `ParquetScanRequest` 或通用 `FileScanRequest`; +- 执行 file-local projection 和 file-local filter; +- 输出 file-local block。 + +建议接口形状: + +```cpp +namespace doris::parquet { + +class ParquetReader : public format::FileReader { +public: + virtual ~ParquetReader() = default; + + virtual Status open(io::FileReaderSPtr file, io::IOContext* io_ctx = nullptr); + virtual Status get_schema(std::vector* file_schema) const; + virtual Status init(const ParquetScanRequest& request); + virtual Status next(Block* file_block, size_t* rows, bool* eof); + virtual Status close(); +}; + +} // namespace doris::parquet +``` + +接口约束: + +- `ParquetReader` 输出的是 file-local block,不输出 table/global schema block。 +- `ParquetReader` 不理解 Iceberg schema evolution。 +- `ParquetReader` 不负责 default/generated/partition 列。 +- 任何 table-level cast/default/generated/partition 语义都不能重新塞回 + `ParquetReader`。 + +## 关键类型 + +### SchemaField + +`SchemaField` 表示文件层 schema 中的列定义。 + +建议包含的信息: + +- file-local column id; +- 列名; +- 类型; +- child fields。 + +它服务于 `TableColumnMapper` 做 schema matching,不携带 table-level 语义。 + +### TableColumnDefinition + +`TableColumnDefinition` 表示 table/global schema 中的列定义。 + +建议包含的信息: + +- table column id; +- 列名; +- 类型; +- child columns。 + +Iceberg 场景下,column id 默认对应 field id。 + +### TableFilter + +`TableFilter` 表示 table 层过滤条件。 + +建议包含的信息: + +- `table_column_id` +- `conjunct` +- `predicates` + +职责约束: + +- `conjunct` 偏表达式过滤,适合表达 cast、复杂表达式、复杂列提取等语义; +- `predicates` 偏结构化单列下推,适合驱动 row group stats、page index、dictionary、 + bloom filter 等文件层优化。 + +### FileLocalFilter + +`FileLocalFilter` 表示已经 localize 到 file-local schema 的过滤条件。 + +建议包含的信息: + +- `file_column_id` +- `conjunct` +- `predicates` + +职责约束: + +- `conjunct` 用于 file-local 表达式过滤; +- `predicates` 用于 file-local 结构化下推; +- 其输入必须来自 `TableColumnMapper`,不能由具体文件 reader 自己推导 table 语义。 + +### ColumnMapping + +`ColumnMapping` 是 table schema 与 file schema 之间的核心边界对象。 + +建议包含的信息: + +- `table_column_id` +- `file_column_id` +- `file_type` +- `table_type` +- `finalize_expr` +- `reader_filter_expr` +- `child_mappings` + +职责约束: + +- `finalize_expr` 服务最终输出,把 file-local value 转成 table/global value; +- `reader_filter_expr` 服务读时 filter fallback; +- 二者语义不同,不能混用; +- `child_mappings` 用于复杂列 remap、复杂列裁剪和复杂列 schema change。 + +### TableScanRequest + +`TableScanRequest` 描述 table 层 scan 请求。 + +建议包含的信息: + +- projected table columns; +- table filters。 + +它由 `IcebergTableReader` 接收,再交给 `TableColumnMapper` 生成 file-local request。 + +### ParquetScanRequest + +`ParquetScanRequest` 继承 `FileScanRequest`,描述 Parquet file-local scan 请求。 + +### FileScanRequest + +`FileScanRequest` 描述通用 file-local scan 请求。 + +建议包含的信息: + +- projected file columns; +- local filters; +- reader expression map。 + +它是 `FileReader` 的唯一 scan 输入,不包含 table/global schema 语义。 + +### IcebergScanTask + +`IcebergScanTask` 表示一次 Iceberg data file 读取任务。 + +建议包含的信息: + +- data file 信息; +- position delete 文件; +- equality delete 文件; +- deletion vector 信息。 + +它是 `IcebergTableReader` 的输入,不应直接传给 `ParquetReader`。 + +### IcebergTableReadParams + +`IcebergTableReadParams` 表示一次 Iceberg table scan 的完整初始化输入。 + +建议包含的信息: + +- Iceberg read options; +- Iceberg table schema; +- table scan request; +- Iceberg scan task 列表; +- 底层 `FileReader`。 + +它用于避免 `IcebergTableReader` 暴露多个半初始化阶段。调用方应一次性构造完整 +参数并调用 `init`。 + +## 设计原则 + +### 边界原则 + +- `FileReader` 不理解 global schema,不直接处理 Iceberg schema evolution。 +- `ParquetReader` 是 `FileReader` 的 Parquet 实现。 +- `TableColumnMapper` 是 schema mapping 和 filter localization 的唯一入口。 +- `IcebergTableReader` 不做 Parquet 解码,只负责 table-level finalize、delete、 + virtual columns。 +- `TableReader` 只负责多文件编排和 table-level 通用裁剪,不下沉文件格式语义。 +- 任何 table-level cast/default/generated/partition 语义都不能重新塞回 + `ParquetReader`。 + +### 依赖原则 + +- 低层不能反向依赖高层语义。 +- `FileReader` 只依赖 file-local request。 +- `IcebergTableReader` 继承 `TableReader`,复用其多文件编排和通用裁剪能力。 +- `IcebergTableReader` 通过组合使用 `FileReader`。 +- `TableColumnMapper` 可以被 Iceberg 之外的其他表格式复用。 + +### 命名原则 + +- 表层抽象使用 `TableReader`、`IcebergTableReader`、`TableColumnMapper`、 + `FileReader`、`ParquetReader` 命名。 +- `TableColumnMapper` 不使用 Iceberg-only 命名。 +- file schema 类型使用 `SchemaField`,table schema 类型使用 `TableColumnDefinition`。 + +## 兼容原则 + +新架构重构期间,新旧代码允许并存,但必须遵守以下约束: + +- 旧 `vparquet` / Hive / Hudi / Paimon 路径在新架构稳定前允许保留。 +- 新架构实现不得继续向旧 `vparquet` 表层语义回灌依赖。 +- 先搭新框架 API,再逐步迁移调用点。 +- 不允许边改 API 边混入临时裸逻辑、实验性草稿或未收敛命名。 +- 兼容层可能需要存在,但本文档不定义兼容层的具体实现方案。 + +## 验收标准 + +该文档应满足以下目标: + +- 不引用错误实验代码作为既成事实; +- 不出现实现性草稿、裸伪代码、未收敛命名混用; +- 让另一个工程师从 `master` 新开分支时,可以直接按本文档搭 API 骨架; +- 读完文档后,不需要再讨论以下问题: + - 新架构分几层; + - 每层负责什么; + - 哪层理解 global schema; + - 哪层做 schema change / filter localization / finalize; + - 哪层允许依赖旧实现,哪层不允许。 diff --git a/docs/new-parquet-reader-column-index-refactor.md b/docs/new-parquet-reader-column-index-refactor.md new file mode 100644 index 00000000000000..56f8c7ca4a37d5 --- /dev/null +++ b/docs/new-parquet-reader-column-index-refactor.md @@ -0,0 +1,404 @@ +# New Reader 列标识实现说明 + +本文说明 Doris new table/file reader 栈中各种列标识的当前含义,以及它们在 +`FileScannerV2`、`TableReader`、`TableColumnMapper` 和 new Parquet reader 中的流转逻辑。 + +核心原则是把 **schema identity** 和 **执行期位置** 分开: + +- schema identity 用来判断 table column 和 file column 是否是同一列。 +- index/position 用来表示 block、projection tree、scan request 或 constant map 中的位置。 +- FE column unique id 只在 scanner 边界用于定位 slot,进入 table/file reader 后不再出现。 + +共享定义集中在 `be/src/format_v2/column_data.h`。file reader 通用请求定义在 +`be/src/format_v2/file_reader.h`。new Parquet reader 自己的 Parquet 内部 schema tree 定义在 +`be/src/format_v2/parquet/parquet_column_schema.h`。 + +## 层级边界 + +当前 reader 栈可以按语义分成三层。 + +### FileScannerV2:FE 标识到 reader 标识的边界 + +`FileScannerV2` 仍能看到 FE 下发的 `slot_id`、`col_unique_id`、`TFileScanSlotInfo` 和 +`TColumnAccessPath`。这些 FE 侧标识只在这里使用。 + +`FileScannerV2::_build_projected_columns()` 会把 `_params->required_slots` 转成 +`std::vector`: + +- vector 下标就是 `GlobalIndex`。 +- `_slot_id_to_global_index` 把 FE `slot_id` 转成 `GlobalIndex`,用于 row-level conjunct。 +- `_column_unique_id_to_global_index` 把 FE `col_unique_id` 转成 `GlobalIndex`,用于 column predicate。 +- `ColumnDefinition::identifier` 表示 table-side schema identity,默认是列名;如果外部 schema + 提供 field id,则改用 field id。 +- partition/default/generated 信息被挂到 `ColumnDefinition` 上,由 table reader 层处理。 + +从这一层往下,table/file reader 不再使用 FE column unique id。 + +### TableReader / TableColumnMapper:table schema 到 file schema + +`TableReader::open_reader()` 对每个 split 打开一个具体 `FileReader`,先通过 +`FileReader::get_schema()` 获取当前文件的 file-local schema,再用 `TableColumnMapper` 建立映射。 + +`TableColumnMapper` 的输入是: + +- table/global schema:`FileScannerV2` 构造的 `projected_columns`。 +- file-local schema:具体 file reader 返回的 `std::vector`。 +- per-split partition values。 +- table-level row filters 和 column predicates。 + +`TableColumnMapper` 的输出是: + +- `ColumnMapping`:构造阶段使用的 table column 到 file/constant/virtual source 的映射。 +- `FileScanRequest`:只含 file-local projection、file-local block layout 和 file-local filters。 +- `ColumnMapResult` / `ResultColumnMapping`:给 table reader finalize 阶段消费的最终映射。 +- `FilterEntry`:给 filter localization 使用的 `GlobalIndex -> LOCAL/CONSTANT/UNSET` target。 +- `ConstantMap`:partition/default/generated 常量列。 + +### FileReader / ParquetReader:只理解 file-local 请求 + +`FileReader` 只暴露两类 schema/request: + +- `get_schema(std::vector*)`:返回文件自身 schema。 +- `open(std::unique_ptr&)`:接收已经 localize 后的 file-local scan request。 + +具体 file reader 不理解 table/global schema、Iceberg default、partition column、FE slot id 或 +FE column unique id。 + +new Parquet reader 使用 `FileScanRequest` 中的 `LocalColumnIndex` 创建 column reader,并使用 +`local_positions` 决定 file-local block layout。 + +## ColumnDefinition + +定义位置:`be/src/format_v2/column_data.h` + +`ColumnDefinition` 是 table/global schema 和 file-local schema 共用的列定义。它表示列名、类型、 +nested children、默认表达式、partition 属性和 file-local column kind。 + +关键字段: + +- `identifier`:schema identity。用于 table column 和 file column 匹配。 +- `local_id`:file reader 返回的 schema node 在当前 parent 下的 reader-local id。 +- `name`:逻辑列名。BY_NAME 且没有显式 string identifier 时会回退到它。 +- `type`:当前 schema node 的 Doris 类型。 +- `children`:nested children。table/global schema 中是 table children;file schema 中是 + file-local children。 +- `default_expr`:missing/default/generated column 的物化表达式。 +- `is_partition_key`:partition column 标记。 +- `column_type`:file-local column kind,例如普通数据列或 row number virtual column。 + +`ColumnDefinition` 不保存 FE column unique id。它也不保存“应该按什么方式匹配”。匹配方式由 +`TableColumnMapperOptions::mode` 统一决定。 + +### identifier + +`identifier` 是一个 `Field`,语义接近 DuckDB `MultiFileColumnDefinition::identifier`: + +- `TYPE_NULL`:没有显式 identifier。BY_NAME 时使用 `name`。 +- `TYPE_INT`:在 BY_FIELD_ID 中表示 field id;在 BY_INDEX 中表示 file schema position。 +- `TYPE_STRING`:显式 name identifier。 + +访问 helper: + +- `has_identifier_field_id()` / `get_identifier_field_id()`:BY_FIELD_ID 使用。 +- `get_identifier_name()`:BY_NAME 使用;没有显式 string identifier 时返回 `name`。 +- `get_identifier_position()`:BY_INDEX 使用。 +- `file_local_id()`:file reader projection 使用;优先返回 `local_id`,否则回退到 int + identifier。这个回退只用于兼容某些 file schema 构造路径,不应重新引入 FE id 语义。 + +## 强类型位置 + +### GlobalIndex + +定义位置:`be/src/format_v2/column_data.h` + +`GlobalIndex` 表示 table/global output block 中的 top-level 列位置。当前等于 +`_params->required_slots` 的下标。 + +主要使用位置: + +- `ColumnMapping::global_index` +- `TableFilter::global_indices` +- `TableColumnPredicates` 的 key +- `ColumnMapResult` / `ResultColumnMapping` 的 key +- `FilterEntry` map 的 key + +`GlobalIndex` 不是 FE slot id,也不是 FE column unique id。 + +### LocalColumnId + +定义位置:`be/src/format_v2/column_data.h` + +`LocalColumnId` 表示当前物理文件 schema 的 top-level reader-local column id。 + +主要使用位置: + +- `FileScanRequest::local_positions` 的 key。 +- `LocalColumnIndex::top_level()`。 +- new Parquet reader 创建 top-level column reader。 +- page index、statistics、bloom filter 等 file-local pruning 的 root column key。 +- row position 这类 reader 内部 virtual column id。 + +`LocalColumnId` 不是 file-local block position。一个 top-level file column 在本次 scan request +输出 block 中的位置由 `LocalIndex` 表示。 + +### LocalIndex + +定义位置:`be/src/format_v2/column_data.h` + +`LocalIndex` 表示一次 `FileScanRequest` 内 file-local block 的列位置。 + +主要使用位置: + +- `FileScanRequest::local_positions` 的 value。 +- file-local rewritten `SlotRef` 的 input position。 +- `TableReader` 从 file block 取列。 +- `ParquetScanScheduler` 把 column reader 读出的数据写入 file block。 + +`LocalIndex` 是 request-local block layout,不是 file schema ordinal。 + +### ConstantIndex + +定义位置:`be/src/format_v2/column_data.h` + +`ConstantIndex` 表示 `ConstantMap` 中的 entry 位置。它用于 per-split/per-file 常量列: + +- partition column。 +- schema evolution default column。 +- generated/default expression column。 +- 将来可扩展到更多 virtual/constant source。 + +`FilterEntry` 可以指向 `ConstantIndex`。当一个 row-level conjunct 只引用 constant target 时, +`TableReader` 会在打开 file reader 前用 1 行常量 block 求值;如果结果为 false/NULL,当前 split +直接跳过。 + +### LocalColumnIndex + +定义位置:`be/src/format_v2/column_data.h` + +`LocalColumnIndex` 表示递归 file-local projection path: + +```cpp +struct LocalColumnIndex { + int32_t index = -1; + bool project_all_children = true; + std::vector children; +}; +``` + +语义: + +- root entry 的 `index` 是 `LocalColumnId`。 +- nested entry 的 `index` 是当前 parent 下的 file-local child id。 +- `project_all_children = true` 表示读取整个 subtree。 +- `project_all_children = false` 表示只读取 `children` 中列出的 child paths。 + +通用 helper: + +- `is_full_projection()` +- `is_partial_projection()` +- `find_child_projection()` +- `is_child_projected()` +- `merge_local_column_index()` + +new Parquet reader 的 STRUCT/LIST/MAP reader 都消费这套 projection helper: + +- STRUCT:只创建被投影 child 的 reader。 +- LIST:把 element projection 递归传给 element reader。 +- MAP:总是读取 key,把 value projection 递归传给 value reader。 + +## FileScanRequest + +定义位置:`be/src/format_v2/file_reader.h` + +`FileScanRequest` 是 table reader 交给 file reader 的唯一 scan 输入。它不包含 table/global schema。 + +关键字段: + +- `predicate_columns`:row-level conjunct/delete conjunct 需要先读取的 file-local projection。 +- `non_predicate_columns`:最终输出需要读取、且不需要先参与 row-level filter 的 file-local + projection。 +- `local_positions`:`LocalColumnId -> LocalIndex`,决定 file-local block layout。 +- `conjuncts` / `delete_conjuncts`:已经把 table/global slot 改写成 file-local slot 的表达式。 +- `column_predicate_filters`:file-layer pruning hints,只用于 min/max、page index、dictionary、 + bloom filter 等剪枝,不参与 batch row filtering。 + +`predicate_columns` 和 `non_predicate_columns` 都按 file-local schema 表达。file reader 只需要根据 +这两个列表创建 reader,并按 `local_positions` 写入 file block。 + +## TableColumnMapper 逻辑 + +定义位置: + +- `be/src/format_v2/column_mapper.h` +- `be/src/format_v2/column_mapper.cpp` + +### 匹配模式 + +`TableColumnMapperOptions::mode` 决定 `identifier` 的解释方式: + +- `BY_FIELD_ID`:`TYPE_INT` identifier 是 field id。 +- `BY_NAME`:`TYPE_STRING` identifier 或 `name` 是匹配名。 +- `BY_INDEX`:`TYPE_INT` identifier 是 file schema position。 + +`TableReader::open_reader()` 当前默认按 field id 映射;如果 file schema 首列没有 int identifier, +会 fallback 到 BY_NAME。Hive reader 可覆盖默认模式,Hive1 ORC 这类场景可使用 BY_INDEX。 + +### create_mapping() + +`create_mapping()` 为每个 `GlobalIndex` 生成一个 `ColumnMapping`: + +1. partition column 优先映射到 `ConstantMap`。 +2. BY_INDEX 时按 file position 取 file schema。 +3. 普通列通过 matcher 在 file schema 中找对应 file field。 +4. 缺失但带 default expr 的列映射到 `ConstantMap`。 +5. 特殊 virtual column 记录 virtual column type。 +6. 允许 missing column 时保留空 mapping,由 table finalize 阶段补 NULL/default。 + +`ColumnMapping::file_local_id` 是 table column 绑定到 file schema 后的 reader-local id: + +- root mapping 中可转成 `LocalColumnId`。 +- nested mapping 中表示 parent 下的 child id。 +- constant/missing/virtual mapping 没有 `file_local_id`。 + +schema identity field id 不保存在 `ColumnMapping` 中,只保存在 +`ColumnDefinition::identifier` 中,并由 mapper 的匹配模式解释。 + +### create_scan_request() + +`create_scan_request()` 把 table-level scan 信息转换成 file-local request: + +1. 先把不参与 row-level filter 的输出列加入 `non_predicate_columns`。 +2. 调用 `localize_filters()`,把 row-level conjunct 和 column predicates 定位到 file-local source。 +3. 为所有已读取 file column 重建 output projection,让 `ColumnMapping::projection` 指向正确的 + `LocalIndex`。 +4. 生成 `ColumnMapResult` 和 `ResultColumnMapping`,供 table reader finalize。 + +`local_positions` 在这个阶段确定。同一个 file column 如果同时被 filter 和 output 使用,只会有 +一个 `LocalIndex`。 + +### FilterEntry + +`FilterEntry` 是 `GlobalIndex` 到 filter target 的结果: + +- `LOCAL`:filter 可以在 file-local block 上求值,target 是 `LocalIndex`。 +- `CONSTANT`:filter 只依赖 `ConstantMap` entry。 +- `UNSET`:当前 split 无法下推到 file reader。 + +`TableColumnMapper::_build_filter_entries()` 在 `FileScanRequest::local_positions` 确定后生成 +`FilterEntry`。表达式改写时只把 `LOCAL` target 改写成 file-local slot;`CONSTANT` target 用于 +split-level constant filter evaluation。 + +### ColumnMapResult / ResultColumnMapping + +`ColumnMapResult` 记录一个 global result column 的递归映射结果: + +- `local_column_id`:root file column。 +- `column_index`:file-local projection tree。 +- `mapping`:root 指向 `LocalIndex`,nested child 通过 `IndexMapping::child_mapping` 递归映射。 + +`ResultColumnMapping` 是最终可消费的 `GlobalIndex -> ColumnMapEntry` map。`ColumnMapEntry` 包含: + +- `IndexMapping mapping` +- `local_type` +- `global_type` +- `filter_conversion` + +TableReader finalize 阶段用它把 file-local block 转成 table/global block。 + +### nested child mapping + +复杂列映射时,`IndexMapping::child_mapping` 的 key 是 table/global child ordinal,value 是对应 +file-local child mapping。这样 filter 中的 `STRUCT_EXTRACT` 可以按 table child ordinal 找到 +file child ordinal。 + +Doris 不再维护额外的 `NestedPredicateTargetInfo` / filter target path。nested filter localization +直接沿 `IndexMapping::child_mapping` 转换 selector path。 + +对于 `SELECT s.name WHERE s.id > 5` 这类 filter-only child: + +- `s.name` 进入 output projection。 +- `s.id` 会进入 predicate projection。 +- `original_file_children` 保留 projection 前的 file children,用于定位 filter-only child。 +- `child_mappings` 只描述输出 shape,避免 filter-only child 改变最终 STRUCT/LIST/MAP shape。 + +## Parquet 内部 schema 标识 + +定义位置:`be/src/format_v2/parquet/parquet_column_schema.h` + +`ParquetColumnSchema` 是 new Parquet reader 内部 schema tree。它描述 Parquet 逻辑字段和 primitive +leaf column 的关系,不暴露给 table reader。对外统一通过 `ParquetReader::get_schema()` 返回 +`std::vector`。 + +关键字段: + +- `local_id`:当前 parent 下的 reader-local id。top-level 是 root field ordinal,nested 是 child + ordinal。`LocalColumnIndex` 传给 `ParquetColumnReaderFactory` 的就是这个 id。 +- `parquet_field_id`:Parquet schema element 中可选的 field_id。Arrow 在不存在 field_id 时返回 + `-1`。它只作为 schema matching identifier,不用于读取 column chunk。 +- `name`:Parquet schema name。 +- `type`:转换后的 Doris 类型。 +- `leaf_column_id`:Parquet primitive leaf column ordinal。用于访问 `ColumnDescriptor`、 + row group column chunk、statistics、page index、bloom filter 等。复杂节点为 `-1`。 +- `type_descriptor`:primitive leaf 的 Parquet physical/logical type 信息。 +- `descriptor`:primitive leaf 的 Arrow Parquet `ColumnDescriptor`。 +- `max_definition_level` / `max_repetition_level`:该 node 下的最大 Dremel level。 +- `nullable_definition_level`:当前 node 自身为 NULL 时对应的 definition level。 +- `repeated_repetition_level`:当前或最近 repeated container 的 repetition level。 + +`ParquetReader::get_schema()` 会把 `ParquetColumnSchema` 转成 `ColumnDefinition`: + +- 如果 `parquet_field_id >= 0`,`ColumnDefinition::identifier` 是 `TYPE_INT` field id。 +- 否则 `identifier` 是 `TYPE_STRING` name。 +- `ColumnDefinition::local_id` 是 `ParquetColumnSchema::local_id`。 +- children 递归转换。 + +因此 table reader 可以按 field id 或 name 匹配,而 Parquet reader 自己仍只按 `local_id`、 +`leaf_column_id` 和 Dremel levels 读取数据。 + +## 端到端流转 + +一次 split 的列标识流转如下: + +1. `FileScannerV2::_build_projected_columns()`: + FE `slot_id` / `col_unique_id` 被翻译成 `GlobalIndex`,并生成 table-side + `ColumnDefinition`。 +2. `ParquetReader::init()`: + 解析 Arrow Parquet schema,构造内部 `ParquetColumnSchema`。 +3. `ParquetReader::get_schema()`: + 把 Parquet 内部 schema 暴露成 file-side `ColumnDefinition`。 +4. `TableReader::open_reader()`: + 根据 file schema 是否带 int identifier 选择 BY_FIELD_ID 或 BY_NAME,并调用 mapper。 +5. `TableColumnMapper::create_mapping()`: + 用 `ColumnDefinition::identifier` 匹配 table/global schema 和 file-local schema,生成 + `ColumnMapping`。 +6. `TableColumnMapper::create_scan_request()`: + 生成 `FileScanRequest`,其中所有 projection 和 block position 都是 file-local 的。 +7. `ParquetReader::open()`: + 校验 `LocalColumnId`,用 `LocalColumnIndex` 创建 column readers,并规划 row group pruning。 +8. `ParquetScanScheduler`: + 按 `local_positions` 把 predicate/non-predicate column 写入 file-local block。 +9. `TableReader` finalize: + 使用 `ResultColumnMapping`、`ConstantMap` 和 projection expression,把 file-local block 转成 + table/global output block。 + +## 使用约定 + +修改 new reader 代码时应遵守以下约定: + +- 不要在 table/file reader 层重新传递 FE column unique id。 +- 不要把 `ColumnDefinition::identifier` 当作 file reader 读取 id。 +- 不要把 `LocalColumnId` 当作 block position;block position 使用 `LocalIndex`。 +- 不要把 `LocalIndex` 当作 schema ordinal。 +- `LocalColumnIndex::index` 在 root 和 child 层含义不同,调用方必须知道当前 projection node + 所在层级。 +- file reader 只能消费 `FileScanRequest`,不能理解 partition/default/generated/table schema。 +- column predicate pruning 是 file-layer hint,不等价于 row-level filter。 +- constant filter 可以在 table reader 层提前求值,但不应下推到 file reader。 + +## 已知限制 + +TVF 查询 Parquet 且文件没有 field id 时,top-level BY_NAME 已经可以通过 name identifier 工作。 +但 nested access path 的 fallback 目前仍有一处 TODO:STRUCT child fallback 使用 struct ordinal +构造 int identifier。对于没有 field id 的 nested Parquet schema,BY_NAME 场景应保留 string +identifier,让 `TableColumnMapper` 从 Parquet file schema 中按 name 解析 file-local child id。 +该问题已在 `be/src/exec/scan/file_scanner_v2.cpp` 代码中记录,当前未修复。 diff --git a/docs/new-parquet-reader-ut-improvement-plan.md b/docs/new-parquet-reader-ut-improvement-plan.md new file mode 100644 index 00000000000000..4ece111d0d6323 --- /dev/null +++ b/docs/new-parquet-reader-ut-improvement-plan.md @@ -0,0 +1,325 @@ +# New Parquet Reader UT Improvement Plan + +本文档评估 Doris new parquet reader 当前 UT 覆盖方式,并给出更合理的测试分层、数据构造方法和落地优先级。 + +目标不是追求形式上的 100% 行覆盖率,而是让测试能够发现 new parquet reader 最容易出错的真实问题:schema 兼容、definition/repetition level 物化、投影/过滤交互、row group/page pruning、delete predicate 以及 schema evolution 组合。 + +## 当前覆盖方式评估 + +当前测试分层大体合理: + +| 层级 | 代表文件 | 当前价值 | +|---|---|---| +| Schema resolver UT | `be/test/format_v2/parquet/parquet_schema_test.cpp` | 直接构造 Parquet schema node,验证 `ParquetColumnSchema` 的 kind、type、level 和非法 schema 拒绝。速度快,适合覆盖 schema 分支。 | +| Type resolver UT | `be/test/format_v2/parquet/parquet_type_test.cpp` | 覆盖 physical/logical/converted type 到 Doris type 的映射。 | +| Leaf value UT | `be/test/format_v2/parquet/parquet_leaf_reader_test.cpp` | 覆盖 nullable spacing、binary/fixed/bool/float16 等 leaf append 细节。 | +| Column reader UT | `be/test/format_v2/parquet/parquet_column_reader_test.cpp` | 用 Arrow writer 生成真实 parquet 文件,覆盖 scalar/struct/list/map 的 read、skip、select、overflow。 | +| File reader UT | `be/test/format_v2/parquet/parquet_reader_test.cpp` | 覆盖 open/read、多 row group、predicate selection、statistics/dictionary/page index pruning、row position、delete predicate。 | +| Table reader UT | `be/test/format_v2/table_reader_test.cpp` | 覆盖 table schema 到 file schema mapping、aggregate pushdown、default value、Iceberg delete/virtual column 等跨层行为。 | + +这个方向是正确的,但目前有三个明显缺口: + +1. Schema 兼容测试和真实读取测试之间缺少桥接。`parquet_schema_test.cpp` 可以证明 legacy LIST/MAP schema 被解析成期望的 tree,但不能证明 `ListColumnReader`、`MapColumnReader` 可以正确消费对应 def/rep levels。 +2. 真实 parquet 文件主要由 Arrow writer 生成。Arrow 生成的文件通常符合标准 layout,不能充分代表 Hive、Spark、old parquet-mr、旧 Doris 或其它 legacy writer 的 schema 形态。 +3. 异常路径和组合路径覆盖不足。比如 optional map key 被 schema 接受后,真实数据中 key 为 null 必须在 materialize 阶段报错;key/value stream 不对齐、invalid repeated level、non-nullable complex column 读到 null 等 corruption 路径需要专门测试。 + +## 改进原则 + +1. 按风险分层测试,不用单一大 fixture 覆盖所有逻辑。 +2. Schema resolver 只验证 schema 归一化,不承担真实读取正确性的证明。 +3. Def/rep level materialization 要有直接单测,避免所有边界都依赖真实 parquet 文件构造。 +4. 对 legacy layout 使用 golden parquet corpus,而不是只用 Arrow writer 动态生成。 +5. Reader 集成测试覆盖跨模块行为,避免在 SQL regression 中验证过多 BE 内部细节。 +6. SQL regression 只保留用户可见和跨层最关键路径,避免回归测试过慢。 + +## 推荐测试分层 + +### L0: Schema Resolver Table-Driven UT + +位置:`be/test/format_v2/parquet/parquet_schema_test.cpp` + +职责:覆盖 `parquet_column_schema.cpp` 的 schema 归一化规则。建议把 LIST/MAP case 整理成 table-driven 形式,每个 case 明确: + +- 输入 schema layout +- 是否成功 +- top-level kind/type/nullability +- child kind/name/type/nullability +- definition/repetition level +- error message 关键字 + +必须覆盖的 schema 形态: + +| 类别 | Case | +|---|---| +| LIST 标准格式 | Standard 3-level list: `optional group a (LIST) { repeated group list { optional int32 element; } }` | +| LIST legacy | repeated primitive, repeated group named `array`, repeated group named `_tuple`, repeated group with multiple children | +| LIST wrapper 判定 | repeated group with logical annotation, repeated group whose only child is repeated, repeated group whose only child is optional scalar | +| Bare repeated | repeated primitive field, repeated group field inside struct | +| MAP 标准格式 | required/optional outer map, required/optional value | +| MAP 兼容格式 | optional key accepted at schema level, `MAP_KEY_VALUE` converted annotation | +| Invalid schema | LIST outer has zero/multiple children, non-repeated LIST child, MAP outer has zero/multiple children, primitive MAP entry, non-repeated MAP entry, entry child count not equal to 2, repeated outer LIST/MAP in normal mode | +| Unsupported type | UTC TIME rejection, unsupported physical/logical type | + +L0 的验收标准:schema branch 新增或修改时,必须有对应 table-driven case;但 L0 通过不代表 reader 行为充分。 + +### L1: Def/Rep Level Materializer UT + +位置建议: + +- `be/test/format_v2/parquet/parquet_nested_materializer_test.cpp` +- 或拆分为 `parquet_list_column_reader_test.cpp`、`parquet_map_column_reader_test.cpp` + +职责:用 fake child reader 直接喂 definition levels、repetition levels 和 leaf values,验证 `ListColumnReader` / `MapColumnReader` 的 offsets、nullmap、child values、cursor 和错误路径。 + +这种方式比构造真实 parquet 文件更适合覆盖边界,因为 def/rep level 是复杂类型 reader 的核心输入。 + +建议增加测试工具: + +```cpp +class FakeNestedColumnReader final : public ParquetColumnReader { +public: + Status load_nested_batch(int64_t rows) override; + Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column, + int64_t* values_read) override; + const std::vector& nested_definition_levels() const override; + const std::vector& nested_repetition_levels() const override; + int64_t nested_levels_written() const override; +}; +``` + +必须覆盖的 materialize case: + +| 类别 | Case | +|---|---| +| LIST 正常路径 | null list, empty list, list with values, list with null element, consecutive repeated elements | +| LIST 操作 | read 分批、skip 后 read、select 非连续行、select 跨 overflow 边界 | +| LIST 异常 | first level has `rep_level == list.repetition_level`, non-nullable LIST 读到 null, child value count 不匹配 | +| MAP 正常路径 | null map, empty map, one entry, multiple entries, nullable value, complex value | +| MAP 操作 | read 分批、skip 后 read、select 非连续行、value scalar path 和 complex value path | +| MAP 异常 | null key, value stream ended before key stream, key/value repetition level 不对齐, key count 不匹配, value count 不匹配, non-nullable MAP 读到 null | + +L1 的验收标准:`ListColumnReader::build_nested_column()` 和 `MapColumnReader::build_nested_column()` 的主要分支必须有直接 UT;corruption path 不能只靠真实文件偶然触发。 + +### L2: Golden Parquet Corpus UT + +位置建议: + +- 数据文件:`be/test/exec/test_data/parquet_v2_compat/` +- 测试文件:`be/test/format_v2/parquet/parquet_compat_corpus_test.cpp` + +职责:保存小型真实 parquet 文件,覆盖非 Arrow 标准 writer 或难以用 Arrow writer 生成的 legacy layout。每个文件控制在几十行以内,配套记录 schema 来源和 expected output。 + +建议文件来源: + +| 来源 | 覆盖目标 | +|---|---| +| Arrow writer | 标准 LIST/MAP、page v2、dictionary/plain、不同 row group/page size | +| Spark | Spark nested list/map schema、nullable struct/list/map 混合 | +| Hive/parquet-mr | legacy two-level list、optional map key、`array` / `bag` / `key_value` 等命名兼容 | +| 手工生成 | malformed-but-parseable def/rep level edge case,或特殊 converted annotation | + +Golden 文件命名建议: + +```text +be/test/exec/test_data/parquet_v2_compat/ + list_two_level_repeated_primitive.parquet + list_tuple_struct_element.parquet + list_repeated_group_with_logical_map_element.parquet + map_optional_key_no_null.parquet + map_optional_key_with_null.parquet + map_value_list_nullable.parquet + nested_list_struct_map_list.parquet + README.md +``` + +每个 corpus case 至少验证: + +- `get_schema()` 输出是否符合预期 +- full read 输出是否符合预期 +- projection read 输出是否符合预期 +- skip/select 后输出是否符合预期 +- 预期失败文件是否返回明确错误 + +L2 的验收标准:每一个 schema compatibility rule 至少有一个真实 parquet 文件证明 reader 可以消费该 layout。 + +### L3: New Parquet Reader Integration UT + +位置:`be/test/format_v2/parquet/parquet_reader_test.cpp` + +职责:覆盖 file reader 层的组合行为,不重复 L1 的低层 def/rep 细节。 + +建议补充或保留以下组合: + +| 类别 | Case | +|---|---| +| Projection + predicate | `SELECT s.b WHERE s.a > x` 对应 file-local projection 与 predicate projection 合并 | +| Complex non-predicate select | predicate 过滤后,非谓词复杂列通过 selection vector 读取 | +| Row group/page pruning + complex projection | page index 缩小 row ranges 后,list/map/struct 输出行数和 offsets 正确 | +| Dictionary/statistics pruning | nested scalar leaf predicate 可 prune,但 repeated leaf 不做错误 aggregate/pruning | +| Delete predicate | delete predicate 和 query predicate 同时作用时 row position、selection、输出列一致 | +| Timestamp TZ | timestamp tz mapping 后 schema、read、min/max pushdown 一致 | +| Reopen split | 同一个 reader reopen 不残留 selection、cast、predicate projection、page skip state | + +L3 的验收标准:跨 reader state 的行为必须有 UT,尤其是 reopen、filter 后 selection、page skip 后 output column 不 double skip。 + +### L4: Table Reader And SQL Regression + +位置: + +- `be/test/format_v2/table_reader_test.cpp` +- `regression-test/suites/external_table_p*_parquet/` 或现有 parquet 外表相关目录 + +职责:覆盖用户可见行为和 FE/BE 接口组合,不在 regression 中验证 BE 内部 offset/nullmap 细节。 + +建议保留少量高价值 SQL regression: + +| 场景 | SQL 覆盖 | +|---|---| +| Legacy LIST/MAP 文件可读 | `SELECT *`, `SELECT nested_child`, `WHERE nested_child predicate` | +| Schema evolution | missing nested child with default, reordered/renamed nested field | +| Predicate pushdown 正确性 | row group/page pruning 开关开启时结果与关闭时一致 | +| Aggregate pushdown 正确性 | `count`, `min`, `max` 对 flat leaf 和 supported nested single leaf 正确;repeated leaf fallback | +| Iceberg/Paimon delete | delete vector / position delete / equality delete 与 parquet reader 组合结果正确 | + +L4 的验收标准:新增用户可见兼容能力时必须有 SQL regression;纯内部 refactor 不强制补 SQL regression,但需要 L0-L3 覆盖。 + +## 覆盖矩阵 + +下面的矩阵用于判断新改动应该补哪一层测试。 + +| 逻辑区域 | L0 Schema | L1 Def/Rep | L2 Corpus | L3 Reader | L4 SQL | +|---|---:|---:|---:|---:|---:| +| Parquet type mapping | 必须 | 不需要 | 可选 | 可选 | 可选 | +| LIST/MAP schema compatibility | 必须 | 可选 | 必须 | 可选 | 必须覆盖用户可见新增能力 | +| Bare repeated field | 必须 | 必须 | 必须 | 可选 | 可选 | +| List offsets/nullmap | 不足 | 必须 | 必须 | 必须 | 可选 | +| Map offsets/nullmap/key validation | 不足 | 必须 | 必须 | 必须 | 可选 | +| Projection pruning | 可选 | 可选 | 必须 | 必须 | 必须覆盖用户可见路径 | +| Predicate selection | 不需要 | 可选 | 可选 | 必须 | 必须覆盖关键路径 | +| Statistics/dictionary/page pruning | 不需要 | 不需要 | 可选 | 必须 | 结果一致性必须 | +| Aggregate pushdown | 不需要 | 不需要 | 可选 | 必须 | 必须 | +| Delete predicate / row position | 不需要 | 不需要 | 可选 | 必须 | Iceberg/Paimon 必须 | +| Error/corruption path | 必须覆盖 schema error | 必须覆盖 materialize error | 必须覆盖真实坏文件 | 可选 | 可选 | + +## 推荐优先级 + +### P0: 立即补齐的正确性保护 + +1. 为 legacy LIST schema 增加真实读取 corpus: + - repeated primitive list + - `_tuple` struct element + - repeated group with multiple children +2. 为 optional MAP key 增加两类真实读取: + - optional key 但所有 key 非 null,读取成功 + - optional key 且存在 null key,读取失败并包含 `contains null key` +3. 增加 fake def/rep level materializer UT: + - list null/empty/null element/multi element + - map null/empty/null value/multi entry/null key +4. 增加 skip/select 覆盖: + - legacy list corpus 上执行 skip/select + - map value list 或 list struct map list 上执行 select + +### P1: 组合路径保护 + +1. Projection + predicate 同时命中同一 nested struct 的不同 child。 +2. Page index pruning 后读取 complex output column,验证没有 double skip。 +3. Row group statistics/dictionary pruning 后从后续 row group 读取 nested column。 +4. Reopen split 后 predicate projection、selection vector、page skip plan 不残留。 + +### P2: 完整性和长期质量 + +1. 建立 `parquet_v2_compat` corpus README,记录文件生成方式、writer 版本、schema、预期行为。 +2. 对 changed files 定期跑 coverage,关注 branch coverage,不只看 line coverage。 +3. 对 schema resolver 增加 table-driven case,减少散落 assert。 +4. 对 materializer 增加 fuzz/property-style 小范围测试:随机生成合法 list/map rows,转换为 def/rep levels 后读回比较原始 logical rows。 + +## 测试数据构造建议 + +### 动态生成数据 + +适合: + +- Arrow 标准 schema +- row group/page size 控制 +- dictionary/plain/page index/statistics 行为 +- type mapping 常规 case + +优点是无需维护二进制文件,case 可读性高。 + +缺点是不能覆盖大量 legacy writer layout。 + +### Golden parquet 文件 + +适合: + +- Hive/Spark/parquet-mr legacy LIST/MAP schema +- Arrow writer 不容易生成的 converted annotation +- malformed-but-parseable 文件 +- 兼容性回归保护 + +要求: + +1. 文件尽量小,通常 3 到 20 行。 +2. 配套 README 说明生成命令、writer 版本、schema、逻辑数据。 +3. 不在 UT 中依赖外部网络或外部服务。 +4. 预期结果在 C++ UT 中直接断言,SQL regression 的 `.out` 仍由 regression 脚本生成。 + +### Fake reader 数据 + +适合: + +- def/rep level 边界 +- corruption path +- cursor/overflow 状态 +- non-nullable output 遇到 null + +要求: + +1. fake reader 只模拟 `ParquetColumnReader` 必需接口。 +2. 每个 case 明确输入 levels 和 expected logical rows。 +3. 错误 case 检查 `Status` 类型和关键错误文本。 + +## 验收标准 + +一个 new parquet reader 改动合入前,建议满足: + +1. 改动 schema resolver:至少补 L0;如果新增兼容能力,补 L2;如果用户可见,补 L4。 +2. 改动 list/map/struct reader:至少补 L1 和 L3;涉及 legacy layout 时补 L2。 +3. 改动 pruning/predicate/aggregate:至少补 L3;用户可见 SQL 语义补 L4。 +4. 改动 table reader mapping/schema evolution:至少补 `table_reader_test.cpp`,必要时补 L4。 +5. 新增 error handling:必须有负向 UT,不能只依赖代码审查。 + +推荐执行命令: + +```bash +./run-be-ut.sh --run '--filter=ParquetSchemaTest.*' +./run-be-ut.sh --run '--filter=ParquetColumnReaderTest.*:NewParquetReaderTest.*:ParquetScanTest.*' +./run-be-ut.sh --run '--filter=TableReaderTest.*' +``` + +对重要重构或发布前验证,建议执行: + +```bash +./run-be-ut.sh --run '--filter=Parquet*:*TableReaderTest*' --coverage +``` + +如果本地工具链无法执行 UT,需要在提交说明或 PR 中明确说明失败原因,并在 CI 或可用环境补跑。 + +## 不建议的方式 + +1. 不建议用更多 schema-only case 替代真实读取 case。schema 正确不等于 reader 正确。 +2. 不建议只用 Arrow writer 动态生成文件证明 compatibility。兼容性问题通常来自非 Arrow writer。 +3. 不建议把所有复杂类型组合塞进一个巨大 fixture 后只断言少量输出。失败定位困难,覆盖意图不清晰。 +4. 不建议把内部 def/rep level 边界全部放到 SQL regression。执行慢、定位差、难覆盖异常路径。 +5. 不建议用 100% line coverage 作为合入门槛。更合理的是 changed branch coverage + 风险矩阵覆盖。 + +## 最小落地计划 + +第一阶段只需要完成 P0: + +1. 新增 `parquet_nested_materializer_test.cpp`,覆盖 list/map def/rep 核心正常和异常路径。 +2. 新增 `be/test/exec/test_data/parquet_v2_compat/README.md` 和 4 到 6 个小型 golden parquet 文件。 +3. 新增 `parquet_compat_corpus_test.cpp`,对 golden 文件做 schema/full read/projection/skip/select 断言。 +4. 将现有 `parquet_schema_test.cpp` 中 LIST/MAP schema case 整理为 table-driven 或至少按类别分组。 + +完成第一阶段后,才能较有信心地说 new parquet reader 的关键逻辑有有效测试保护;否则当前 UT 只能证明主路径和部分 schema 分支,不能充分发现 legacy compatibility 和 complex materialization 的问题。 diff --git a/docs/parquet-list-map-compat-design.md b/docs/parquet-list-map-compat-design.md new file mode 100644 index 00000000000000..a02ca6e822aaf0 --- /dev/null +++ b/docs/parquet-list-map-compat-design.md @@ -0,0 +1,664 @@ +# Parquet LIST/MAP Compatibility Design + +本文描述如何参考 Arrow Parquet 的 LIST/MAP 兼容策略,在 Doris new parquet reader 中支持更多 Parquet 标准和 legacy 复杂类型 schema。 + +目标不是改变 `ListColumnReader` / `MapColumnReader` 的读取模型,而是在 schema 构建阶段把不同物理 schema 归一化成 Doris 当前 reader 可以消费的统一 `ParquetColumnSchema` tree。 + +## 背景 + +Parquet 的复杂类型是通过 group schema、logical/converted annotation、definition levels 和 repetition levels 共同表达的。 + +标准 LIST/MAP schema 比较明确,但历史 writer 产生过多种 legacy 形态。例如 LIST 可能缺少标准 `list.element` wrapper,MAP entry group 可能叫 `key_value`、`entries` 或其它名字。 + +Arrow C++ 的处理思路是: + +1. 在 Parquet schema conversion 阶段识别标准和 legacy schema。 +2. 将这些 schema 归一化为 Arrow `ListType` / `MapType` / `StructType`。 +3. 后续 reader 只消费归一化后的 nested field tree,不在读取阶段继续判断 legacy schema 名字。 + +Doris new parquet reader 应采用相同边界: + +1. `parquet_column_schema.cpp` 负责兼容不同 LIST/MAP physical schema。 +2. `ParquetColumnSchema` 输出统一的 LIST/MAP child tree。 +3. `ListColumnReader` / `MapColumnReader` / `ParquetLeafReader` 不感知 legacy schema 形态。 + +## 当前 Doris 限制 + +当前 `build_node_schema()` 的 LIST 分支只支持标准 3-level LIST: + +```text +optional group a (LIST) { + repeated group list { + optional int32 element; + } +} +``` + +当前限制: + +- outer LIST group 必须只有一个 child。 +- repeated child 必须是 group。 +- repeated group 必须只有一个 child。 +- 不支持 repeated primitive list。 +- 不支持 repeated group 多字段 struct element。 +- 不支持 `array` / `_tuple` 这类 legacy structural name。 + +当前 MAP 分支支持标准 MAP 结构: + +```text +optional group m (MAP) { + repeated group key_value { + required binary key; + optional int32 value; + } +} +``` + +当前限制: + +- outer MAP group 必须只有一个 child。 +- entry child 必须 repeated group。 +- entry group 必须正好两个 children。 +- key 必须 required。 +- 不支持 key-only map。 +- 不支持没有 repeated entry layer 的非标准 MAP。 + +## 设计原则 + +1. 兼容逻辑只放在 schema 构建阶段。 +2. reader 层继续消费统一 schema tree。 +3. 不支持会改变 reader model 的格式,例如没有 repeated entry layer 的 MAP。 +4. 第一阶段不支持 key-only map,因为 Doris `ColumnMap` 需要 values column。 +5. 对容易误判的 schema 保持严格,避免把普通 struct 错解析成 LIST/MAP。 +6. 支持范围对齐 Arrow 的稳定 legacy compatibility 规则,而不是无限放宽。 + +MAP projection 语义也保持收敛: + +- partial MAP projection 只表示 value subtree pruning,例如 `MAP>` 投影 `value.b` 后输出 `MAP>`。 +- key 不作为可裁剪 projection 子树。reader 始终读取完整 key stream,因为 key stream 决定 entry existence、offsets,并且 key 本身承载 MAP 的 key equality 语义。 +- schema projection 重建 `DataTypeMap` 时保留原始 key type,只根据 projected value child 重建 value type。 + +## LIST 兼容规则 + +对于 outer group annotated as `LIST`: + +```text +optional group a (LIST) { + repeated ... repeated_child; +} +``` + +先要求: + +- outer LIST group 必须只有一个 child。 +- child 必须是 repeated。 + +然后根据 repeated child 形态判断 element schema node。 + +### 1. 标准 3-level LIST + +```text +optional group a (LIST) { + repeated group list { + optional int32 element; + } +} +``` + +解析: + +- repeated child 是 wrapper。 +- element 是 wrapper 的唯一 child:`list.element`。 +- `ParquetColumnSchema(LIST).children[0]` 指向 element schema。 + +### 2. Repeated primitive legacy LIST + +```text +optional group a (LIST) { + repeated int32 element; +} +``` + +解析: + +- repeated primitive 本身是 element。 +- element 本身不 nullable,因为 repeated primitive 不提供额外 optional element level。 +- array 自身 nullable 仍由 outer LIST group 决定。 + +### 3. Repeated group as struct element + +```text +optional group a (LIST) { + repeated group element { + optional int32 x; + optional binary y; + } +} +``` + +解析: + +- repeated group 有多个 children。 +- repeated group 本身是 element。 +- element type 是 `STRUCT`。 + +### 4. Legacy structural name + +Arrow 会将某些名字视作 structural element,而不是标准 wrapper。 + +```text +optional group a (LIST) { + repeated group array { + optional int32 item; + } +} +``` + +```text +optional group a (LIST) { + repeated group a_tuple { + optional int32 item; + } +} +``` + +解析: + +- repeated group 名为 `array`,或名为 `_tuple`。 +- repeated group 本身是 element。 +- 即使它只有一个 child,也不要剥掉这一层。 + +### 5. One-child repeated group wrapper + +```text +optional group a (LIST) { + repeated group list { + optional int32 element; + } +} +``` + +如果 repeated group 只有一个 child,且不是 legacy structural name,则按 wrapper 处理: + +- element 是 repeated group 的唯一 child。 + +但这里不能只按 child 数量判断。需要额外保持 Arrow / parquet-format 的 backward compatibility 规则: + +- 如果 repeated group 自身带 `LIST` 或 `MAP` annotation,则 repeated group 本身是 element,不剥 wrapper。 +- 如果 repeated group 的唯一 child 也是 repeated,则 repeated group 本身是 element,不剥 wrapper。 +- 只有当 repeated group 无 logical annotation、唯一 child 非 repeated、且不是 legacy structural name 时,才把它当作标准 wrapper 剥掉。 + +这样可以避免把 two-level `List>`、two-level `List>` 或单字段 repeated struct element 错解析成少一层的结构。 + +## LIST schema resolver + +建议在 `parquet_column_schema.cpp` 中新增 helper: + +```cpp +struct ListElementResolution { + const parquet::schema::Node* repeated_node = nullptr; + const parquet::schema::Node* element_node = nullptr; + SchemaBuildContext repeated_context; + SchemaBuildContext element_context; + bool element_is_repeated_node = false; +}; + +Status resolve_list_element_node( + const parquet::SchemaDescriptor& schema, + const parquet::schema::GroupNode& list_group, + const SchemaBuildContext& list_context, + ListElementResolution* result); +``` + +Resolver 逻辑: + +```text +if list_group.field_count != 1: + reject + +repeated_node = list_group.field(0) +if !repeated_node.is_repeated: + reject + +repeated_context = child_context(list_context, repeated_node, 0) + +if repeated_node.is_primitive: + element_node = repeated_node + element_context = repeated_context + element_is_repeated_node = true + return + +repeated_group = as_group(repeated_node) +if repeated_group.field_count == 0: + reject + +if repeated_group.field_count > 1: + element_node = repeated_node + element_context = repeated_context + element_is_repeated_node = true + return + +if has_structural_list_name(list_group.name, repeated_group.name): + element_node = repeated_node + element_context = repeated_context + element_is_repeated_node = true + return + +if repeated_group has LIST or MAP annotation: + element_node = repeated_node + element_context = repeated_context + element_is_repeated_node = true + return + +only_child = repeated_group.field(0) +if only_child.is_repeated: + element_node = repeated_node + element_context = repeated_context + element_is_repeated_node = true + return + +element_node = only_child +element_context = child_context(repeated_context, only_child, 0) +element_is_repeated_node = false +``` + +`has_structural_list_name()` 对齐 Arrow 的 legacy rule: + +```text +name == "array" || name == list_name + "_tuple" +``` + +## LIST schema build + +`build_node_schema()` 的 LIST 分支改为: + +```text +resolve_list_element_node(...) + +column_schema.kind = LIST +column_schema.definition_level = repeated_context.definition_level +column_schema.repetition_level = repeated_context.repetition_level +column_schema.repeated_repetition_level = repeated_context.repeated_repetition_level + +build child schema from resolved element_node and element_context +column_schema.type = nullable_if_needed(DataTypeArray(child.type), list_node) +column_schema.children = [child] +propagate_child_levels(column_schema) +``` + +### repeated group itself as element + +当 element 是 repeated group 本身时,需要注意不要把这个 repeated group 再解释成一层 LIST。 + +预期效果: + +```text +optional group a (LIST) { + repeated group element { + optional int32 x; + optional binary y; + } +} +``` + +应构造成: + +```text +LIST + child: STRUCT +``` + +而不是: + +```text +LIST + child: LIST or extra repeated container +``` + +实现上可以新增一个 internal build mode: + +```cpp +enum class SchemaBuildMode { + NORMAL, + REPEATED_GROUP_AS_LIST_ELEMENT, +}; +``` + +当 mode 是 `REPEATED_GROUP_AS_LIST_ELEMENT`: + +- 当前 repeated group 作为 element 本身构造成 STRUCT 或 annotated logical type。 +- 它的 repeated level 已经由 list entry 层消费,不再把 repeated 当作额外 array 层。 +- 如果当前 repeated group 是普通 group,则构造成 `STRUCT` element。 +- 如果当前 repeated group 带 `LIST` annotation,则继续按 LIST 解析它的 child repeated layer,构造成 nested list element。 +- 如果当前 repeated group 带 `MAP` 或 `MAP_KEY_VALUE` annotation,则继续按 MAP 解析它的 child repeated entry layer,构造成 map element。 +- 构造当前 element schema 时,不得再次因为“当前节点本身是 repeated”引入隐式 list;只有它内部的 child repeated layer 才能产生下一层 list/map repetition 语义。 + +如果希望保持改动更小,也可以新增专用函数: + +```cpp +Status build_repeated_group_as_list_element_schema(...); +``` + +该函数至少需要处理 repeated group 作为普通 struct element 的场景;如果选择不用通用 build mode,则还需要显式覆盖 repeated group annotated as LIST/MAP 的场景。 + +## MAP 兼容规则 + +对于 outer group annotated as `MAP` 或 legacy `MAP_KEY_VALUE`: + +```text +optional group m (MAP) { + repeated group entries { + required binary key; + optional int32 value; + } +} +``` + +支持: + +- 只有 outer group 带 `MAP` / `MAP_KEY_VALUE` annotation 时,才进入 MAP 兼容解析。 +- entry group 名字可以是 `key_value`、`entries` 或其它。 +- key/value 字段名不强制必须叫 `key` / `value`。 +- 第一个 child 是 key。 +- 第二个 child 是 value。 +- key 必须 required。 +- value 可以 required 或 optional。 + +不支持: + +- outer MAP group 多个 children。 +- entry child 非 repeated。 +- entry child 是 primitive。 +- entry group 没有 value,即 key-only map。 +- 没有 repeated entry layer 的 MAP。 +- nullable key。 + +## MAP schema resolver + +建议新增 helper: + +```cpp +struct MapEntryResolution { + const parquet::schema::GroupNode* entry_group = nullptr; + SchemaBuildContext entry_context; +}; + +Status resolve_map_entry_group( + const parquet::schema::GroupNode& map_group, + const SchemaBuildContext& map_context, + MapEntryResolution* result); +``` + +Resolver 逻辑: + +```text +if map_group.field_count != 1: + reject + +entry_node = map_group.field(0) +if !entry_node.is_repeated: + reject +if entry_node.is_primitive: + reject + +entry_group = as_group(entry_node) +if entry_group.field_count != 2: + reject + +key_node = entry_group.field(0) +value_node = entry_group.field(1) +if key_node.repetition != REQUIRED: + reject + +entry_context = child_context(map_context, entry_node, 0) +return +``` + +## MAP schema build + +`build_node_schema()` 的 MAP 分支应和 LIST 一样在 schema 构建阶段折叠物理 wrapper。 +`key_value` / `entries` / 任意合法 entry group 只用于解析 repeated entry level,不出现在 +最终 `ParquetColumnSchema.children` 中: + +```text +MAP + child[0]: key + child[1]: value +``` + +构造流程: + +```text +resolve_map_entry_group(...) + +column_schema.kind = MAP +column_schema.definition_level = entry_context.definition_level +column_schema.repetition_level = entry_context.repetition_level +column_schema.repeated_repetition_level = entry_context.repeated_repetition_level + +build key child from entry_group.field(0) +build value child from entry_group.field(1) + +column_schema.type = nullable_if_needed(DataTypeMap(nullable(key.type), nullable(value.type)), map_node) +column_schema.children = [key_schema, value_schema] +propagate_child_levels(column_schema) +``` + +这里保持 `MapColumnReader` 的直接 key/value 假设: + +- `column_schema.children[0]` 是 key。 +- `column_schema.children[1]` 是 value。 +- MAP node 自身保存 entry repeated group 的 `definition_level` / `repetition_level` / + `repeated_repetition_level`,用于 materialize offsets、null map 和 empty map。 + +注意:`DataTypeMap` 中把 key type 包成 nullable 是 Doris nested column materialization 的内部类型约定,不代表 Parquet nullable key 被支持。Schema resolver 仍必须在 `key_node.repetition != REQUIRED` 时 reject。 + +## 不支持 key-only map 的原因 + +Key-only map 可能长这样: + +```text +optional group m (MAP) { + repeated group entries { + required binary key; + } +} +``` + +理论上可以解释为 set-like map 或 `MAP`,但 Doris `ColumnMap` 需要 keys column 和 values column。 + +若要支持,需要额外设计: + +- synthetic null value schema。 +- constant-null value reader。 +- `MapColumnReader` value stream 缺失时的特殊路径。 + +这会改变 reader tree,不属于本次 schema compatibility 的最小范围。因此第一阶段明确 reject。 + +## 不支持 no-entry MAP 的原因 + +No-entry MAP 可能长这样: + +```text +optional group m (MAP) { + required binary key; + optional int32 value; +} +``` + +它缺少 repeated entry layer,因此没有 repetition level 可以表达多个 map entries,也无法生成 Doris `ColumnMap` offsets。 + +这不是标准 MAP,也不是 Arrow 主要兼容的 legacy 形态。第一阶段应 reject。 + +## 对 reader 层的影响 + +预期不修改 reader 层核心逻辑。 + +保持: + +- `ListColumnReader` 只读取 `column_schema.children[0]` 作为 element reader。 +- `MapColumnReader` 读取 `column_schema.children[0/1]` 作为 key/value reader。 +- `MapColumnReader` 对 partial MAP projection 只接受 value child projection,显式 key child projection 应 reject;即使只裁剪 value,reader 也必须完整读取 key stream。 +- `ParquetLeafReader` 只负责 leaf records/levels/values 读取和 batch materialization。 +- `nested_column_materializer.*` 只负责 Doris nested Column 构造 helper。 + +风险点在 LIST repeated group as element: + +- 如果该 repeated group 是 struct element,需要确保 schema builder 不把 repeated group 再解释成一个额外 repeated container。 +- 这个风险应通过专用 build mode 或专用 helper 解决。 + +## 错误处理策略 + +错误信息应明确指出具体 unsupported schema 原因: + +- LIST outer group child count invalid。 +- LIST child is not repeated。 +- LIST repeated group has no child。 +- MAP outer group child count invalid。 +- MAP entry is not repeated group。 +- MAP entry child count is not 2。 +- MAP key is nullable。 + +不要用过于笼统的 `Unsupported parquet LIST encoding` 覆盖所有错误,否则后续排查文件兼容性问题会困难。 + +## 测试计划 + +### LIST 正例 + +1. 标准 3-level LIST: + +```text +optional group a (LIST) { + repeated group list { + optional int32 element; + } +} +``` + +2. Repeated primitive legacy LIST: + +```text +optional group a (LIST) { + repeated int32 element; +} +``` + +3. Repeated group struct element: + +```text +optional group a (LIST) { + repeated group element { + optional int32 x; + optional binary y; + } +} +``` + +4. Legacy `array` name: + +```text +optional group a (LIST) { + repeated group array { + optional int32 item; + } +} +``` + +5. Legacy `_tuple` name: + +```text +optional group a (LIST) { + repeated group a_tuple { + optional int32 item; + } +} +``` + +6. Repeated group annotated as nested LIST: + +```text +optional group a (LIST) { + repeated group array (LIST) { + repeated int32 array; + } +} +``` + +预期解析为 `ARRAY>`,不要剥掉 `array (LIST)` 这一层。 + +7. Repeated group annotated as MAP: + +```text +optional group a (LIST) { + repeated group array (MAP) { + repeated group key_value { + required binary key; + optional int32 value; + } + } +} +``` + +预期解析为 `ARRAY>`,不要剥掉 `array (MAP)` 这一层。 + +8. One-child repeated group whose child is repeated: + +```text +optional group a (LIST) { + repeated group element { + repeated int32 items; + } +} +``` + +预期 repeated group 本身是 struct element,解析为 `ARRAY>>`,不要把 `items` 提升成 list element。 + +### LIST 反例 + +1. outer LIST group 多 child。 +2. outer LIST child 非 repeated。 +3. repeated group 无 child。 +4. repeated LIST-annotated outer group,除非它作为 another two-level LIST 的 element 被专门支持。 + +### MAP 正例 + +1. 标准 `key_value` entry group。 +2. `entries` entry group name。 +3. entry group 任意名字,但结构为 repeated group with required key and value。 +4. `MAP_KEY_VALUE` legacy converted type。 +5. key/value 字段名非 `key`/`value`,但位置正确。 + +### MAP 反例 + +1. nullable key。 +2. outer MAP group 多 child。 +3. entry child 非 repeated。 +4. entry child 是 primitive。 +5. key-only map。 +6. no-entry MAP。 + +## 实施步骤 + +1. 在 `parquet_column_schema.cpp` 增加 LIST helper: + - `has_structural_list_name()` + - `resolve_list_element_node()` + - 必要时增加 repeated group as element 的 build helper。 +2. 改造 LIST 分支,输出统一 `ParquetColumnSchemaKind::LIST` schema tree。 +3. 增加 LIST schema/unit/regression 测试。 + - 覆盖 repeated primitive、multi-field struct element、`array` / `_tuple` structural name。 + - 覆盖 two-level `List>`、two-level `List>`、单 child repeated group 且 child repeated 的 struct element。 + - read 测试至少覆盖 null list、empty list、单元素、多元素,验证 def/rep materialization。 +4. 增加 MAP helper: + - `resolve_map_entry_group()` +5. 改造 MAP 分支,放宽 entry group 名字限制,但保持 key/value 结构严格,并在 schema build 阶段折叠 entry wrapper,输出 `MAP -> key,value`。 +6. 增加 MAP schema/unit/regression 测试。 + - 覆盖 entry group 名字兼容。 + - 覆盖 `ParquetColumnSchema(MAP).children == [key, value]`。 + - 覆盖 partial MAP projection 只允许 value child,key child projection reject。 +7. 如后续确有需求,再单独设计 key-only map 或 key subtree projection 支持。 + +## 预期收益 + +- 支持更多由 Arrow、Spark、Hive、旧 Parquet writer 产生的 LIST/MAP schema。 +- 兼容逻辑集中在 schema builder,reader 层保持稳定。 +- 为后续 complex parquet reader 的兼容性测试建立清晰边界。 diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java index b40ff54fbd829c..cdb3d1a7ed06d7 100644 --- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java +++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java @@ -24,6 +24,7 @@ import org.apache.doris.common.security.authentication.PreExecutionAuthenticatorCache; import com.google.common.base.Preconditions; +import org.apache.paimon.CoreOptions; import org.apache.paimon.data.InternalRow; import org.apache.paimon.predicate.Predicate; import org.apache.paimon.reader.RecordReader; @@ -37,6 +38,7 @@ import java.io.IOException; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.TimeZone; @@ -234,6 +236,8 @@ protected TableSchema parseTableSchema() throws UnsupportedOperationException { private void initTable() { Preconditions.checkState(params.containsKey("serialized_table")); table = PaimonUtils.deserialize(params.get("serialized_table")); + table = table.copy(Collections.singletonMap( + CoreOptions.READ_BATCH_SIZE.key(), String.valueOf(batchSize))); paimonAllFieldNames = PaimonUtils.getFieldNames(this.table.rowType()); if (LOG.isDebugEnabled()) { LOG.debug("paimonAllFieldNames:{}", paimonAllFieldNames); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index 1d234377d83251..e1311237a603d5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -1253,6 +1253,9 @@ public Map getSupportedSysTables() { public TFileFormatType getFileFormatType(SessionVariable sessionVariable) throws UserException { TFileFormatType type = null; Table table = getRemoteTable(); + // now hive self only support mixed with orc/parquet files in table and different partitions + // But if mixed with orc/parquet files in table and same partition, will failed when read. + // now here hive used table format, so BE will regrard all files in table is same format. String inputFormatName = table.getSd().getInputFormat(); String hiveFormat = HiveMetaStoreClientHelper.HiveFileFormat.getFormat(inputFormatName); if (hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.PARQUET.getDesc())) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java index 17a742b835a4fb..27698c2d1f9700 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java @@ -47,6 +47,7 @@ import org.apache.doris.thrift.TFileRangeDesc; import org.apache.doris.thrift.TPaimonDeletionFileDesc; import org.apache.doris.thrift.TPaimonFileDesc; +import org.apache.doris.thrift.TPaimonReaderType; import org.apache.doris.thrift.TPushAggOp; import org.apache.doris.thrift.TTableFormatFileDesc; @@ -262,8 +263,10 @@ private void setPaimonParams(TFileRangeDesc rangeDesc, PaimonSplit paimonSplit) rangeDesc.setFormatType(TFileFormatType.FORMAT_JNI); // Use Paimon native serialization for paimon-cpp reader if (sessionVariable.isEnablePaimonCppReader() && split instanceof DataSplit) { + fileDesc.setReaderType(TPaimonReaderType.PAIMON_CPP); fileDesc.setPaimonSplit(PaimonUtil.encodeDataSplitToString((DataSplit) split)); } else { + fileDesc.setReaderType(TPaimonReaderType.PAIMON_JNI); fileDesc.setPaimonSplit(PaimonUtil.encodeObjectToString(split)); } // Set table location for paimon-cpp reader @@ -274,6 +277,7 @@ private void setPaimonParams(TFileRangeDesc rangeDesc, PaimonSplit paimonSplit) rangeDesc.setSelfSplitWeight(paimonSplit.getSelfSplitWeight()); } else { // use native reader + fileDesc.setReaderType(TPaimonReaderType.PAIMON_NATIVE); if (fileFormat.equals("orc")) { rangeDesc.setFormatType(TFileFormatType.FORMAT_ORC); } else if (fileFormat.equals("parquet")) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 845030a37c163a..77813e2d62b75f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -96,6 +96,7 @@ public class SessionVariable implements Serializable, Writable { public static final String SCAN_QUEUE_MEM_LIMIT = "scan_queue_mem_limit"; public static final String MAX_SCANNERS_CONCURRENCY = "max_scanners_concurrency"; public static final String MAX_FILE_SCANNERS_CONCURRENCY = "max_file_scanners_concurrency"; + public static final String ENABLE_FILE_SCANNER_V2 = "enable_file_scanner_v2"; public static final String MIN_SCANNERS_CONCURRENCY = "min_scanners_concurrency"; public static final String MIN_FILE_SCANNERS_CONCURRENCY = "min_file_scanners_concurrency"; public static final String MIN_SCAN_SCHEDULER_CONCURRENCY = "min_scan_scheduler_concurrency"; @@ -1146,6 +1147,11 @@ public static double getHotValueThreshold() { "FileScanNode 扫描数据的最大并发,默认为 16", "The max threads to read data of FileScanNode, default 16"}) public int maxFileScannersConcurrency = 16; + @VarAttrDef.VarAttr(name = ENABLE_FILE_SCANNER_V2, needForward = true, description = { + "开启后 FileScanNode 会在支持的查询场景使用 FileScannerV2,默认开启", + "When enabled, FileScanNode uses FileScannerV2 for supported query scans. Enabled by default."}) + public boolean enableFileScannerV2 = true; + @VarAttrDef.VarAttr(name = LOCAL_EXCHANGE_FREE_BLOCKS_LIMIT) public int localExchangeFreeBlocksLimit = 4; @@ -2987,10 +2993,9 @@ public static boolean isEagerAggregationOnJoin() { public static final String ENABLE_MC_LIMIT_SPLIT_OPTIMIZATION = "enable_mc_limit_split_optimization"; @VarAttrDef.VarAttr( name = ENABLE_EXTERNAL_TABLE_BATCH_MODE, - fuzzy = true, description = {"使能外表的 batch mode 功能", "Enable the batch mode function of the external table."}, needForward = true) - public boolean enableExternalTableBatchMode = true; + public boolean enableExternalTableBatchMode = false; @VarAttrDef.VarAttr( name = ENABLE_MC_LIMIT_SPLIT_OPTIMIZATION, @@ -3954,13 +3959,6 @@ private void setFuzzyForCatalog(Random random) { this.hiveTextCompression = Util.getRandomString( "gzip", "defalte", "bzip2", "zstd", "lz4", "lzo", "snappy", "plain"); - // batch mode - this.enableExternalTableBatchMode = random.nextBoolean(); - if (this.enableExternalTableBatchMode) { - this.numPartitionsInBatchMode = Util.getRandomInt(0, 1024, Integer.MAX_VALUE); - this.numFilesInBatchMode = Util.getRandomInt(0, 1024, Integer.MAX_VALUE); - } - // common this.enableCountPushDownForExternalTable = random.nextBoolean(); } @@ -5552,6 +5550,7 @@ public TQueryOptions toThrift() { tResult.setScanQueueMemLimit(maxScanQueueMemByte); tResult.setMaxScannersConcurrency(maxScannersConcurrency); tResult.setMaxFileScannersConcurrency(maxFileScannersConcurrency); + tResult.setEnableFileScannerV2(enableFileScannerV2); tResult.setMaxColumnReaderNum(maxColumnReaderNum); tResult.setParallelPrepareThreshold(parallelPrepareThreshold); tResult.setMinScannersConcurrency(minScannersConcurrency); diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/ParquetFileFormatPropertiesTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/ParquetFileFormatPropertiesTest.java index 370e4965765854..4d140b2ba57037 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/ParquetFileFormatPropertiesTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/ParquetFileFormatPropertiesTest.java @@ -47,6 +47,7 @@ public void testAnalyzeFileFormatProperties() { Assert.assertEquals(TParquetCompressionType.SNAPPY, parquetFileFormatProperties.getParquetCompressionType()); Assert.assertEquals(false, parquetFileFormatProperties.isParquetDisableDictionary()); + Assert.assertTrue(parquetFileFormatProperties.isEnableInt96Timestamps()); } @Test @@ -139,6 +140,7 @@ public void testFullTResultFileSinkOptions() { parquetFileFormatProperties.fullTResultFileSinkOptions(sinkOptions); Assert.assertEquals(parquetFileFormatProperties.getParquetCompressionType(), sinkOptions.getParquetCompressionType()); Assert.assertEquals(parquetFileFormatProperties.isParquetDisableDictionary(), sinkOptions.isParquetDisableDictionary()); + Assert.assertEquals(parquetFileFormatProperties.isEnableInt96Timestamps(), sinkOptions.isEnableInt96Timestamps()); } @Test diff --git a/gensrc/thrift/Exprs.thrift b/gensrc/thrift/Exprs.thrift index c17199d74edf91..a17cd140c93418 100644 --- a/gensrc/thrift/Exprs.thrift +++ b/gensrc/thrift/Exprs.thrift @@ -88,6 +88,10 @@ enum TExprNodeType { TRY_CAST_EXPR = 41 // for search DSL function SEARCH_EXPR = 42, + // Normal predicate expression + PREDICATE = 43, + // Normal literal + LITERAL = 44, } //enum TAggregationOp { diff --git a/gensrc/thrift/Opcodes.thrift b/gensrc/thrift/Opcodes.thrift index 1e4002357e7599..a2d709799482eb 100644 --- a/gensrc/thrift/Opcodes.thrift +++ b/gensrc/thrift/Opcodes.thrift @@ -97,4 +97,6 @@ enum TExprOpcode { MATCH_REGEXP = 76, MATCH_PHRASE_EDGE = 77, TRY_CAST = 78, + // Delete operator from Iceberg/Paimon + DELETE = 79, } diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index cd2292ca6b63c3..7b628a70e6982d 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -509,6 +509,7 @@ struct TQueryOptions { // In read path, read from file cache or remote storage when execute query. 1000: optional bool disable_file_cache = false 1001: optional i32 file_cache_query_limit_percent = -1 + 1002: optional bool enable_file_scanner_v2 = false } diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index fb8ef30150e27a..acd0e3975bbd8b 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -355,6 +355,12 @@ struct TPaimonDeletionFileDesc { 3: optional i64 length; } +enum TPaimonReaderType { + PAIMON_NATIVE = 0, + PAIMON_JNI = 1, + PAIMON_CPP = 2, +} + struct TPaimonFileDesc { 1: optional string paimon_split 2: optional string paimon_column_names @@ -372,6 +378,8 @@ struct TPaimonFileDesc { 14: optional string paimon_table // deprecated 15: optional i64 row_count // deprecated 16: optional i64 schema_id; // for schema change. + // Reader implementation for logical paimon split. Native file split uses range format type. + 17: optional TPaimonReaderType reader_type; } struct TTrinoConnectorFileDesc { diff --git a/regression-test/data/export_p0/export/test_show_export.out b/regression-test/data/export_p0/export/test_show_export.out index 90277ca28f2a9f..eb2d2ab154b1b3 100644 --- a/regression-test/data/export_p0/export/test_show_export.out +++ b/regression-test/data/export_p0/export/test_show_export.out @@ -102,156 +102,156 @@ 99 2017-10-01 2017-10-01T00:00 Beijing 99 99 true 99 99 99 99.99 99.99 char99 99 -- !select_load1 -- -1 2017-10-01 2017-10-01T00:00 Beijing 1 1 true 1 1 1.1 1.1 char1 1 1 -10 2017-10-01 2017-10-01T00:00 Beijing 10 10 true 10 10 10.1 10.1 char10 10 10 +1 2017-10-01 2017-10-01T00:00 Beijing 1 1 true 1 1 1.1 1.1 char1 1.000000000 1 +10 2017-10-01 2017-10-01T00:00 Beijing 10 10 true 10 10 10.1 10.1 char10 10.000000000 10 100 2017-10-01 2017-10-01T00:00 \N \N \N \N \N \N \N \N \N \N \N -11 2017-10-01 2017-10-01T00:00 Beijing 11 11 true 11 11 11.11 11.11 char11 11 11 -12 2017-10-01 2017-10-01T00:00 Beijing 12 12 true 12 12 12.12 12.12 char12 12 12 -13 2017-10-01 2017-10-01T00:00 Beijing 13 13 true 13 13 13.13 13.13 char13 13 13 -14 2017-10-01 2017-10-01T00:00 Beijing 14 14 true 14 14 14.14 14.14 char14 14 14 -15 2017-10-01 2017-10-01T00:00 Beijing 15 15 true 15 15 15.15 15.15 char15 15 15 -16 2017-10-01 2017-10-01T00:00 Beijing 16 16 true 16 16 16.16 16.16 char16 16 16 -17 2017-10-01 2017-10-01T00:00 Beijing 17 17 true 17 17 17.17 17.17 char17 17 17 -18 2017-10-01 2017-10-01T00:00 Beijing 18 18 true 18 18 18.18 18.18 char18 18 18 -19 2017-10-01 2017-10-01T00:00 Beijing 19 19 true 19 19 19.19 19.19 char19 19 19 -2 2017-10-01 2017-10-01T00:00 Beijing 2 2 true 2 2 2.2 2.2 char2 2 2 -20 2017-10-01 2017-10-01T00:00 Beijing 20 20 true 20 20 20.2 20.2 char20 20 20 -21 2017-10-01 2017-10-01T00:00 Beijing 21 21 true 21 21 21.21 21.21 char21 21 21 -22 2017-10-01 2017-10-01T00:00 Beijing 22 22 true 22 22 22.22 22.22 char22 22 22 -23 2017-10-01 2017-10-01T00:00 Beijing 23 23 true 23 23 23.23 23.23 char23 23 23 -24 2017-10-01 2017-10-01T00:00 Beijing 24 24 true 24 24 24.24 24.24 char24 24 24 -25 2017-10-01 2017-10-01T00:00 Beijing 25 25 true 25 25 25.25 25.25 char25 25 25 -26 2017-10-01 2017-10-01T00:00 Beijing 26 26 true 26 26 26.26 26.26 char26 26 26 -27 2017-10-01 2017-10-01T00:00 Beijing 27 27 true 27 27 27.27 27.27 char27 27 27 -28 2017-10-01 2017-10-01T00:00 Beijing 28 28 true 28 28 28.28 28.28 char28 28 28 -29 2017-10-01 2017-10-01T00:00 Beijing 29 29 true 29 29 29.29 29.29 char29 29 29 -3 2017-10-01 2017-10-01T00:00 Beijing 3 3 true 3 3 3.3 3.3 char3 3 3 -30 2017-10-01 2017-10-01T00:00 Beijing 30 30 true 30 30 30.3 30.3 char30 30 30 -31 2017-10-01 2017-10-01T00:00 Beijing 31 31 true 31 31 31.31 31.31 char31 31 31 -32 2017-10-01 2017-10-01T00:00 Beijing 32 32 true 32 32 32.32 32.32 char32 32 32 -33 2017-10-01 2017-10-01T00:00 Beijing 33 33 true 33 33 33.33 33.33 char33 33 33 -34 2017-10-01 2017-10-01T00:00 Beijing 34 34 true 34 34 34.34 34.34 char34 34 34 -35 2017-10-01 2017-10-01T00:00 Beijing 35 35 true 35 35 35.35 35.35 char35 35 35 -36 2017-10-01 2017-10-01T00:00 Beijing 36 36 true 36 36 36.36 36.36 char36 36 36 -37 2017-10-01 2017-10-01T00:00 Beijing 37 37 true 37 37 37.37 37.37 char37 37 37 -38 2017-10-01 2017-10-01T00:00 Beijing 38 38 true 38 38 38.38 38.38 char38 38 38 -39 2017-10-01 2017-10-01T00:00 Beijing 39 39 true 39 39 39.39 39.39 char39 39 39 -4 2017-10-01 2017-10-01T00:00 Beijing 4 4 true 4 4 4.4 4.4 char4 4 4 -40 2017-10-01 2017-10-01T00:00 Beijing 40 40 true 40 40 40.4 40.4 char40 40 40 -41 2017-10-01 2017-10-01T00:00 Beijing 41 41 true 41 41 41.41 41.41 char41 41 41 -42 2017-10-01 2017-10-01T00:00 Beijing 42 42 true 42 42 42.42 42.42 char42 42 42 -43 2017-10-01 2017-10-01T00:00 Beijing 43 43 true 43 43 43.43 43.43 char43 43 43 -44 2017-10-01 2017-10-01T00:00 Beijing 44 44 true 44 44 44.44 44.44 char44 44 44 -45 2017-10-01 2017-10-01T00:00 Beijing 45 45 true 45 45 45.45 45.45 char45 45 45 -46 2017-10-01 2017-10-01T00:00 Beijing 46 46 true 46 46 46.46 46.46 char46 46 46 -47 2017-10-01 2017-10-01T00:00 Beijing 47 47 true 47 47 47.47 47.47 char47 47 47 -48 2017-10-01 2017-10-01T00:00 Beijing 48 48 true 48 48 48.48 48.48 char48 48 48 -49 2017-10-01 2017-10-01T00:00 Beijing 49 49 true 49 49 49.49 49.49 char49 49 49 -5 2017-10-01 2017-10-01T00:00 Beijing 5 5 true 5 5 5.5 5.5 char5 5 5 -50 2017-10-01 2017-10-01T00:00 Beijing 50 50 true 50 50 50.5 50.5 char50 50 50 -51 2017-10-01 2017-10-01T00:00 Beijing 51 51 true 51 51 51.51 51.51 char51 51 51 -52 2017-10-01 2017-10-01T00:00 Beijing 52 52 true 52 52 52.52 52.52 char52 52 52 -53 2017-10-01 2017-10-01T00:00 Beijing 53 53 true 53 53 53.53 53.53 char53 53 53 -54 2017-10-01 2017-10-01T00:00 Beijing 54 54 true 54 54 54.54 54.54 char54 54 54 -55 2017-10-01 2017-10-01T00:00 Beijing 55 55 true 55 55 55.55 55.55 char55 55 55 -56 2017-10-01 2017-10-01T00:00 Beijing 56 56 true 56 56 56.56 56.56 char56 56 56 -57 2017-10-01 2017-10-01T00:00 Beijing 57 57 true 57 57 57.57 57.57 char57 57 57 -58 2017-10-01 2017-10-01T00:00 Beijing 58 58 true 58 58 58.58 58.58 char58 58 58 -59 2017-10-01 2017-10-01T00:00 Beijing 59 59 true 59 59 59.59 59.59 char59 59 59 -6 2017-10-01 2017-10-01T00:00 Beijing 6 6 true 6 6 6.6 6.6 char6 6 6 -60 2017-10-01 2017-10-01T00:00 Beijing 60 60 true 60 60 60.6 60.6 char60 60 60 -61 2017-10-01 2017-10-01T00:00 Beijing 61 61 true 61 61 61.61 61.61 char61 61 61 -62 2017-10-01 2017-10-01T00:00 Beijing 62 62 true 62 62 62.62 62.62 char62 62 62 -63 2017-10-01 2017-10-01T00:00 Beijing 63 63 true 63 63 63.63 63.63 char63 63 63 -64 2017-10-01 2017-10-01T00:00 Beijing 64 64 true 64 64 64.64 64.64 char64 64 64 -65 2017-10-01 2017-10-01T00:00 Beijing 65 65 true 65 65 65.65 65.65 char65 65 65 -66 2017-10-01 2017-10-01T00:00 Beijing 66 66 true 66 66 66.66 66.66 char66 66 66 -67 2017-10-01 2017-10-01T00:00 Beijing 67 67 true 67 67 67.67 67.67 char67 67 67 -68 2017-10-01 2017-10-01T00:00 Beijing 68 68 true 68 68 68.68 68.68 char68 68 68 -69 2017-10-01 2017-10-01T00:00 Beijing 69 69 true 69 69 69.69 69.69 char69 69 69 -7 2017-10-01 2017-10-01T00:00 Beijing 7 7 true 7 7 7.7 7.7 char7 7 7 -70 2017-10-01 2017-10-01T00:00 Beijing 70 70 true 70 70 70.7 70.7 char70 70 70 -71 2017-10-01 2017-10-01T00:00 Beijing 71 71 true 71 71 71.71 71.71 char71 71 71 -72 2017-10-01 2017-10-01T00:00 Beijing 72 72 true 72 72 72.72 72.72 char72 72 72 -73 2017-10-01 2017-10-01T00:00 Beijing 73 73 true 73 73 73.73 73.73 char73 73 73 -74 2017-10-01 2017-10-01T00:00 Beijing 74 74 true 74 74 74.74 74.74 char74 74 74 -75 2017-10-01 2017-10-01T00:00 Beijing 75 75 true 75 75 75.75 75.75 char75 75 75 -76 2017-10-01 2017-10-01T00:00 Beijing 76 76 true 76 76 76.76 76.76 char76 76 76 -77 2017-10-01 2017-10-01T00:00 Beijing 77 77 true 77 77 77.77 77.77 char77 77 77 -78 2017-10-01 2017-10-01T00:00 Beijing 78 78 true 78 78 78.78 78.78 char78 78 78 -79 2017-10-01 2017-10-01T00:00 Beijing 79 79 true 79 79 79.79 79.79 char79 79 79 -8 2017-10-01 2017-10-01T00:00 Beijing 8 8 true 8 8 8.8 8.8 char8 8 8 -80 2017-10-01 2017-10-01T00:00 Beijing 80 80 true 80 80 80.8 80.8 char80 80 80 -81 2017-10-01 2017-10-01T00:00 Beijing 81 81 true 81 81 81.81 81.81 char81 81 81 -82 2017-10-01 2017-10-01T00:00 Beijing 82 82 true 82 82 82.82 82.82 char82 82 82 -83 2017-10-01 2017-10-01T00:00 Beijing 83 83 true 83 83 83.83 83.83 char83 83 83 -84 2017-10-01 2017-10-01T00:00 Beijing 84 84 true 84 84 84.84 84.84 char84 84 84 -85 2017-10-01 2017-10-01T00:00 Beijing 85 85 true 85 85 85.85 85.85 char85 85 85 -86 2017-10-01 2017-10-01T00:00 Beijing 86 86 true 86 86 86.86 86.86 char86 86 86 -87 2017-10-01 2017-10-01T00:00 Beijing 87 87 true 87 87 87.87 87.87 char87 87 87 -88 2017-10-01 2017-10-01T00:00 Beijing 88 88 true 88 88 88.88 88.88 char88 88 88 -89 2017-10-01 2017-10-01T00:00 Beijing 89 89 true 89 89 89.89 89.89 char89 89 89 -9 2017-10-01 2017-10-01T00:00 Beijing 9 9 true 9 9 9.9 9.9 char9 9 9 -90 2017-10-01 2017-10-01T00:00 Beijing 90 90 true 90 90 90.9 90.9 char90 90 90 -91 2017-10-01 2017-10-01T00:00 Beijing 91 91 true 91 91 91.91 91.91 char91 91 91 -92 2017-10-01 2017-10-01T00:00 Beijing 92 92 true 92 92 92.92 92.92 char92 92 92 -93 2017-10-01 2017-10-01T00:00 Beijing 93 93 true 93 93 93.93 93.93 char93 93 93 -94 2017-10-01 2017-10-01T00:00 Beijing 94 94 true 94 94 94.94 94.94 char94 94 94 -95 2017-10-01 2017-10-01T00:00 Beijing 95 95 true 95 95 95.95 95.95 char95 95 95 -96 2017-10-01 2017-10-01T00:00 Beijing 96 96 true 96 96 96.96 96.96 char96 96 96 -97 2017-10-01 2017-10-01T00:00 Beijing 97 97 true 97 97 97.97 97.97 char97 97 97 -98 2017-10-01 2017-10-01T00:00 Beijing 98 98 true 98 98 98.98 98.98 char98 98 98 -99 2017-10-01 2017-10-01T00:00 Beijing 99 99 true 99 99 99.99 99.99 char99 99 99 +11 2017-10-01 2017-10-01T00:00 Beijing 11 11 true 11 11 11.11 11.11 char11 11.000000000 11 +12 2017-10-01 2017-10-01T00:00 Beijing 12 12 true 12 12 12.12 12.12 char12 12.000000000 12 +13 2017-10-01 2017-10-01T00:00 Beijing 13 13 true 13 13 13.13 13.13 char13 13.000000000 13 +14 2017-10-01 2017-10-01T00:00 Beijing 14 14 true 14 14 14.14 14.14 char14 14.000000000 14 +15 2017-10-01 2017-10-01T00:00 Beijing 15 15 true 15 15 15.15 15.15 char15 15.000000000 15 +16 2017-10-01 2017-10-01T00:00 Beijing 16 16 true 16 16 16.16 16.16 char16 16.000000000 16 +17 2017-10-01 2017-10-01T00:00 Beijing 17 17 true 17 17 17.17 17.17 char17 17.000000000 17 +18 2017-10-01 2017-10-01T00:00 Beijing 18 18 true 18 18 18.18 18.18 char18 18.000000000 18 +19 2017-10-01 2017-10-01T00:00 Beijing 19 19 true 19 19 19.19 19.19 char19 19.000000000 19 +2 2017-10-01 2017-10-01T00:00 Beijing 2 2 true 2 2 2.2 2.2 char2 2.000000000 2 +20 2017-10-01 2017-10-01T00:00 Beijing 20 20 true 20 20 20.2 20.2 char20 20.000000000 20 +21 2017-10-01 2017-10-01T00:00 Beijing 21 21 true 21 21 21.21 21.21 char21 21.000000000 21 +22 2017-10-01 2017-10-01T00:00 Beijing 22 22 true 22 22 22.22 22.22 char22 22.000000000 22 +23 2017-10-01 2017-10-01T00:00 Beijing 23 23 true 23 23 23.23 23.23 char23 23.000000000 23 +24 2017-10-01 2017-10-01T00:00 Beijing 24 24 true 24 24 24.24 24.24 char24 24.000000000 24 +25 2017-10-01 2017-10-01T00:00 Beijing 25 25 true 25 25 25.25 25.25 char25 25.000000000 25 +26 2017-10-01 2017-10-01T00:00 Beijing 26 26 true 26 26 26.26 26.26 char26 26.000000000 26 +27 2017-10-01 2017-10-01T00:00 Beijing 27 27 true 27 27 27.27 27.27 char27 27.000000000 27 +28 2017-10-01 2017-10-01T00:00 Beijing 28 28 true 28 28 28.28 28.28 char28 28.000000000 28 +29 2017-10-01 2017-10-01T00:00 Beijing 29 29 true 29 29 29.29 29.29 char29 29.000000000 29 +3 2017-10-01 2017-10-01T00:00 Beijing 3 3 true 3 3 3.3 3.3 char3 3.000000000 3 +30 2017-10-01 2017-10-01T00:00 Beijing 30 30 true 30 30 30.3 30.3 char30 30.000000000 30 +31 2017-10-01 2017-10-01T00:00 Beijing 31 31 true 31 31 31.31 31.31 char31 31.000000000 31 +32 2017-10-01 2017-10-01T00:00 Beijing 32 32 true 32 32 32.32 32.32 char32 32.000000000 32 +33 2017-10-01 2017-10-01T00:00 Beijing 33 33 true 33 33 33.33 33.33 char33 33.000000000 33 +34 2017-10-01 2017-10-01T00:00 Beijing 34 34 true 34 34 34.34 34.34 char34 34.000000000 34 +35 2017-10-01 2017-10-01T00:00 Beijing 35 35 true 35 35 35.35 35.35 char35 35.000000000 35 +36 2017-10-01 2017-10-01T00:00 Beijing 36 36 true 36 36 36.36 36.36 char36 36.000000000 36 +37 2017-10-01 2017-10-01T00:00 Beijing 37 37 true 37 37 37.37 37.37 char37 37.000000000 37 +38 2017-10-01 2017-10-01T00:00 Beijing 38 38 true 38 38 38.38 38.38 char38 38.000000000 38 +39 2017-10-01 2017-10-01T00:00 Beijing 39 39 true 39 39 39.39 39.39 char39 39.000000000 39 +4 2017-10-01 2017-10-01T00:00 Beijing 4 4 true 4 4 4.4 4.4 char4 4.000000000 4 +40 2017-10-01 2017-10-01T00:00 Beijing 40 40 true 40 40 40.4 40.4 char40 40.000000000 40 +41 2017-10-01 2017-10-01T00:00 Beijing 41 41 true 41 41 41.41 41.41 char41 41.000000000 41 +42 2017-10-01 2017-10-01T00:00 Beijing 42 42 true 42 42 42.42 42.42 char42 42.000000000 42 +43 2017-10-01 2017-10-01T00:00 Beijing 43 43 true 43 43 43.43 43.43 char43 43.000000000 43 +44 2017-10-01 2017-10-01T00:00 Beijing 44 44 true 44 44 44.44 44.44 char44 44.000000000 44 +45 2017-10-01 2017-10-01T00:00 Beijing 45 45 true 45 45 45.45 45.45 char45 45.000000000 45 +46 2017-10-01 2017-10-01T00:00 Beijing 46 46 true 46 46 46.46 46.46 char46 46.000000000 46 +47 2017-10-01 2017-10-01T00:00 Beijing 47 47 true 47 47 47.47 47.47 char47 47.000000000 47 +48 2017-10-01 2017-10-01T00:00 Beijing 48 48 true 48 48 48.48 48.48 char48 48.000000000 48 +49 2017-10-01 2017-10-01T00:00 Beijing 49 49 true 49 49 49.49 49.49 char49 49.000000000 49 +5 2017-10-01 2017-10-01T00:00 Beijing 5 5 true 5 5 5.5 5.5 char5 5.000000000 5 +50 2017-10-01 2017-10-01T00:00 Beijing 50 50 true 50 50 50.5 50.5 char50 50.000000000 50 +51 2017-10-01 2017-10-01T00:00 Beijing 51 51 true 51 51 51.51 51.51 char51 51.000000000 51 +52 2017-10-01 2017-10-01T00:00 Beijing 52 52 true 52 52 52.52 52.52 char52 52.000000000 52 +53 2017-10-01 2017-10-01T00:00 Beijing 53 53 true 53 53 53.53 53.53 char53 53.000000000 53 +54 2017-10-01 2017-10-01T00:00 Beijing 54 54 true 54 54 54.54 54.54 char54 54.000000000 54 +55 2017-10-01 2017-10-01T00:00 Beijing 55 55 true 55 55 55.55 55.55 char55 55.000000000 55 +56 2017-10-01 2017-10-01T00:00 Beijing 56 56 true 56 56 56.56 56.56 char56 56.000000000 56 +57 2017-10-01 2017-10-01T00:00 Beijing 57 57 true 57 57 57.57 57.57 char57 57.000000000 57 +58 2017-10-01 2017-10-01T00:00 Beijing 58 58 true 58 58 58.58 58.58 char58 58.000000000 58 +59 2017-10-01 2017-10-01T00:00 Beijing 59 59 true 59 59 59.59 59.59 char59 59.000000000 59 +6 2017-10-01 2017-10-01T00:00 Beijing 6 6 true 6 6 6.6 6.6 char6 6.000000000 6 +60 2017-10-01 2017-10-01T00:00 Beijing 60 60 true 60 60 60.6 60.6 char60 60.000000000 60 +61 2017-10-01 2017-10-01T00:00 Beijing 61 61 true 61 61 61.61 61.61 char61 61.000000000 61 +62 2017-10-01 2017-10-01T00:00 Beijing 62 62 true 62 62 62.62 62.62 char62 62.000000000 62 +63 2017-10-01 2017-10-01T00:00 Beijing 63 63 true 63 63 63.63 63.63 char63 63.000000000 63 +64 2017-10-01 2017-10-01T00:00 Beijing 64 64 true 64 64 64.64 64.64 char64 64.000000000 64 +65 2017-10-01 2017-10-01T00:00 Beijing 65 65 true 65 65 65.65 65.65 char65 65.000000000 65 +66 2017-10-01 2017-10-01T00:00 Beijing 66 66 true 66 66 66.66 66.66 char66 66.000000000 66 +67 2017-10-01 2017-10-01T00:00 Beijing 67 67 true 67 67 67.67 67.67 char67 67.000000000 67 +68 2017-10-01 2017-10-01T00:00 Beijing 68 68 true 68 68 68.68 68.68 char68 68.000000000 68 +69 2017-10-01 2017-10-01T00:00 Beijing 69 69 true 69 69 69.69 69.69 char69 69.000000000 69 +7 2017-10-01 2017-10-01T00:00 Beijing 7 7 true 7 7 7.7 7.7 char7 7.000000000 7 +70 2017-10-01 2017-10-01T00:00 Beijing 70 70 true 70 70 70.7 70.7 char70 70.000000000 70 +71 2017-10-01 2017-10-01T00:00 Beijing 71 71 true 71 71 71.71 71.71 char71 71.000000000 71 +72 2017-10-01 2017-10-01T00:00 Beijing 72 72 true 72 72 72.72 72.72 char72 72.000000000 72 +73 2017-10-01 2017-10-01T00:00 Beijing 73 73 true 73 73 73.73 73.73 char73 73.000000000 73 +74 2017-10-01 2017-10-01T00:00 Beijing 74 74 true 74 74 74.74 74.74 char74 74.000000000 74 +75 2017-10-01 2017-10-01T00:00 Beijing 75 75 true 75 75 75.75 75.75 char75 75.000000000 75 +76 2017-10-01 2017-10-01T00:00 Beijing 76 76 true 76 76 76.76 76.76 char76 76.000000000 76 +77 2017-10-01 2017-10-01T00:00 Beijing 77 77 true 77 77 77.77 77.77 char77 77.000000000 77 +78 2017-10-01 2017-10-01T00:00 Beijing 78 78 true 78 78 78.78 78.78 char78 78.000000000 78 +79 2017-10-01 2017-10-01T00:00 Beijing 79 79 true 79 79 79.79 79.79 char79 79.000000000 79 +8 2017-10-01 2017-10-01T00:00 Beijing 8 8 true 8 8 8.8 8.8 char8 8.000000000 8 +80 2017-10-01 2017-10-01T00:00 Beijing 80 80 true 80 80 80.8 80.8 char80 80.000000000 80 +81 2017-10-01 2017-10-01T00:00 Beijing 81 81 true 81 81 81.81 81.81 char81 81.000000000 81 +82 2017-10-01 2017-10-01T00:00 Beijing 82 82 true 82 82 82.82 82.82 char82 82.000000000 82 +83 2017-10-01 2017-10-01T00:00 Beijing 83 83 true 83 83 83.83 83.83 char83 83.000000000 83 +84 2017-10-01 2017-10-01T00:00 Beijing 84 84 true 84 84 84.84 84.84 char84 84.000000000 84 +85 2017-10-01 2017-10-01T00:00 Beijing 85 85 true 85 85 85.85 85.85 char85 85.000000000 85 +86 2017-10-01 2017-10-01T00:00 Beijing 86 86 true 86 86 86.86 86.86 char86 86.000000000 86 +87 2017-10-01 2017-10-01T00:00 Beijing 87 87 true 87 87 87.87 87.87 char87 87.000000000 87 +88 2017-10-01 2017-10-01T00:00 Beijing 88 88 true 88 88 88.88 88.88 char88 88.000000000 88 +89 2017-10-01 2017-10-01T00:00 Beijing 89 89 true 89 89 89.89 89.89 char89 89.000000000 89 +9 2017-10-01 2017-10-01T00:00 Beijing 9 9 true 9 9 9.9 9.9 char9 9.000000000 9 +90 2017-10-01 2017-10-01T00:00 Beijing 90 90 true 90 90 90.9 90.9 char90 90.000000000 90 +91 2017-10-01 2017-10-01T00:00 Beijing 91 91 true 91 91 91.91 91.91 char91 91.000000000 91 +92 2017-10-01 2017-10-01T00:00 Beijing 92 92 true 92 92 92.92 92.92 char92 92.000000000 92 +93 2017-10-01 2017-10-01T00:00 Beijing 93 93 true 93 93 93.93 93.93 char93 93.000000000 93 +94 2017-10-01 2017-10-01T00:00 Beijing 94 94 true 94 94 94.94 94.94 char94 94.000000000 94 +95 2017-10-01 2017-10-01T00:00 Beijing 95 95 true 95 95 95.95 95.95 char95 95.000000000 95 +96 2017-10-01 2017-10-01T00:00 Beijing 96 96 true 96 96 96.96 96.96 char96 96.000000000 96 +97 2017-10-01 2017-10-01T00:00 Beijing 97 97 true 97 97 97.97 97.97 char97 97.000000000 97 +98 2017-10-01 2017-10-01T00:00 Beijing 98 98 true 98 98 98.98 98.98 char98 98.000000000 98 +99 2017-10-01 2017-10-01T00:00 Beijing 99 99 true 99 99 99.99 99.99 char99 99.000000000 99 -- !select_load1 -- -20 2017-10-01 2017-10-01T00:00 Beijing 20 20 true 20 20 20.2 20.2 char20 20 20 -21 2017-10-01 2017-10-01T00:00 Beijing 21 21 true 21 21 21.21 21.21 char21 21 21 -22 2017-10-01 2017-10-01T00:00 Beijing 22 22 true 22 22 22.22 22.22 char22 22 22 -23 2017-10-01 2017-10-01T00:00 Beijing 23 23 true 23 23 23.23 23.23 char23 23 23 -24 2017-10-01 2017-10-01T00:00 Beijing 24 24 true 24 24 24.24 24.24 char24 24 24 -25 2017-10-01 2017-10-01T00:00 Beijing 25 25 true 25 25 25.25 25.25 char25 25 25 -26 2017-10-01 2017-10-01T00:00 Beijing 26 26 true 26 26 26.26 26.26 char26 26 26 -27 2017-10-01 2017-10-01T00:00 Beijing 27 27 true 27 27 27.27 27.27 char27 27 27 -28 2017-10-01 2017-10-01T00:00 Beijing 28 28 true 28 28 28.28 28.28 char28 28 28 -29 2017-10-01 2017-10-01T00:00 Beijing 29 29 true 29 29 29.29 29.29 char29 29 29 -30 2017-10-01 2017-10-01T00:00 Beijing 30 30 true 30 30 30.3 30.3 char30 30 30 -31 2017-10-01 2017-10-01T00:00 Beijing 31 31 true 31 31 31.31 31.31 char31 31 31 -32 2017-10-01 2017-10-01T00:00 Beijing 32 32 true 32 32 32.32 32.32 char32 32 32 -33 2017-10-01 2017-10-01T00:00 Beijing 33 33 true 33 33 33.33 33.33 char33 33 33 -34 2017-10-01 2017-10-01T00:00 Beijing 34 34 true 34 34 34.34 34.34 char34 34 34 -35 2017-10-01 2017-10-01T00:00 Beijing 35 35 true 35 35 35.35 35.35 char35 35 35 -36 2017-10-01 2017-10-01T00:00 Beijing 36 36 true 36 36 36.36 36.36 char36 36 36 -37 2017-10-01 2017-10-01T00:00 Beijing 37 37 true 37 37 37.37 37.37 char37 37 37 -38 2017-10-01 2017-10-01T00:00 Beijing 38 38 true 38 38 38.38 38.38 char38 38 38 -39 2017-10-01 2017-10-01T00:00 Beijing 39 39 true 39 39 39.39 39.39 char39 39 39 -40 2017-10-01 2017-10-01T00:00 Beijing 40 40 true 40 40 40.4 40.4 char40 40 40 -41 2017-10-01 2017-10-01T00:00 Beijing 41 41 true 41 41 41.41 41.41 char41 41 41 -42 2017-10-01 2017-10-01T00:00 Beijing 42 42 true 42 42 42.42 42.42 char42 42 42 -43 2017-10-01 2017-10-01T00:00 Beijing 43 43 true 43 43 43.43 43.43 char43 43 43 -44 2017-10-01 2017-10-01T00:00 Beijing 44 44 true 44 44 44.44 44.44 char44 44 44 -45 2017-10-01 2017-10-01T00:00 Beijing 45 45 true 45 45 45.45 45.45 char45 45 45 -46 2017-10-01 2017-10-01T00:00 Beijing 46 46 true 46 46 46.46 46.46 char46 46 46 -47 2017-10-01 2017-10-01T00:00 Beijing 47 47 true 47 47 47.47 47.47 char47 47 47 -48 2017-10-01 2017-10-01T00:00 Beijing 48 48 true 48 48 48.48 48.48 char48 48 48 -49 2017-10-01 2017-10-01T00:00 Beijing 49 49 true 49 49 49.49 49.49 char49 49 49 -50 2017-10-01 2017-10-01T00:00 Beijing 50 50 true 50 50 50.5 50.5 char50 50 50 -51 2017-10-01 2017-10-01T00:00 Beijing 51 51 true 51 51 51.51 51.51 char51 51 51 -52 2017-10-01 2017-10-01T00:00 Beijing 52 52 true 52 52 52.52 52.52 char52 52 52 -53 2017-10-01 2017-10-01T00:00 Beijing 53 53 true 53 53 53.53 53.53 char53 53 53 -54 2017-10-01 2017-10-01T00:00 Beijing 54 54 true 54 54 54.54 54.54 char54 54 54 -55 2017-10-01 2017-10-01T00:00 Beijing 55 55 true 55 55 55.55 55.55 char55 55 55 -56 2017-10-01 2017-10-01T00:00 Beijing 56 56 true 56 56 56.56 56.56 char56 56 56 -57 2017-10-01 2017-10-01T00:00 Beijing 57 57 true 57 57 57.57 57.57 char57 57 57 -58 2017-10-01 2017-10-01T00:00 Beijing 58 58 true 58 58 58.58 58.58 char58 58 58 -59 2017-10-01 2017-10-01T00:00 Beijing 59 59 true 59 59 59.59 59.59 char59 59 59 -60 2017-10-01 2017-10-01T00:00 Beijing 60 60 true 60 60 60.6 60.6 char60 60 60 -61 2017-10-01 2017-10-01T00:00 Beijing 61 61 true 61 61 61.61 61.61 char61 61 61 -62 2017-10-01 2017-10-01T00:00 Beijing 62 62 true 62 62 62.62 62.62 char62 62 62 -63 2017-10-01 2017-10-01T00:00 Beijing 63 63 true 63 63 63.63 63.63 char63 63 63 -64 2017-10-01 2017-10-01T00:00 Beijing 64 64 true 64 64 64.64 64.64 char64 64 64 -65 2017-10-01 2017-10-01T00:00 Beijing 65 65 true 65 65 65.65 65.65 char65 65 65 -66 2017-10-01 2017-10-01T00:00 Beijing 66 66 true 66 66 66.66 66.66 char66 66 66 -67 2017-10-01 2017-10-01T00:00 Beijing 67 67 true 67 67 67.67 67.67 char67 67 67 -68 2017-10-01 2017-10-01T00:00 Beijing 68 68 true 68 68 68.68 68.68 char68 68 68 -69 2017-10-01 2017-10-01T00:00 Beijing 69 69 true 69 69 69.69 69.69 char69 69 69 +20 2017-10-01 2017-10-01T00:00 Beijing 20 20 true 20 20 20.2 20.2 char20 20.000000000 20 +21 2017-10-01 2017-10-01T00:00 Beijing 21 21 true 21 21 21.21 21.21 char21 21.000000000 21 +22 2017-10-01 2017-10-01T00:00 Beijing 22 22 true 22 22 22.22 22.22 char22 22.000000000 22 +23 2017-10-01 2017-10-01T00:00 Beijing 23 23 true 23 23 23.23 23.23 char23 23.000000000 23 +24 2017-10-01 2017-10-01T00:00 Beijing 24 24 true 24 24 24.24 24.24 char24 24.000000000 24 +25 2017-10-01 2017-10-01T00:00 Beijing 25 25 true 25 25 25.25 25.25 char25 25.000000000 25 +26 2017-10-01 2017-10-01T00:00 Beijing 26 26 true 26 26 26.26 26.26 char26 26.000000000 26 +27 2017-10-01 2017-10-01T00:00 Beijing 27 27 true 27 27 27.27 27.27 char27 27.000000000 27 +28 2017-10-01 2017-10-01T00:00 Beijing 28 28 true 28 28 28.28 28.28 char28 28.000000000 28 +29 2017-10-01 2017-10-01T00:00 Beijing 29 29 true 29 29 29.29 29.29 char29 29.000000000 29 +30 2017-10-01 2017-10-01T00:00 Beijing 30 30 true 30 30 30.3 30.3 char30 30.000000000 30 +31 2017-10-01 2017-10-01T00:00 Beijing 31 31 true 31 31 31.31 31.31 char31 31.000000000 31 +32 2017-10-01 2017-10-01T00:00 Beijing 32 32 true 32 32 32.32 32.32 char32 32.000000000 32 +33 2017-10-01 2017-10-01T00:00 Beijing 33 33 true 33 33 33.33 33.33 char33 33.000000000 33 +34 2017-10-01 2017-10-01T00:00 Beijing 34 34 true 34 34 34.34 34.34 char34 34.000000000 34 +35 2017-10-01 2017-10-01T00:00 Beijing 35 35 true 35 35 35.35 35.35 char35 35.000000000 35 +36 2017-10-01 2017-10-01T00:00 Beijing 36 36 true 36 36 36.36 36.36 char36 36.000000000 36 +37 2017-10-01 2017-10-01T00:00 Beijing 37 37 true 37 37 37.37 37.37 char37 37.000000000 37 +38 2017-10-01 2017-10-01T00:00 Beijing 38 38 true 38 38 38.38 38.38 char38 38.000000000 38 +39 2017-10-01 2017-10-01T00:00 Beijing 39 39 true 39 39 39.39 39.39 char39 39.000000000 39 +40 2017-10-01 2017-10-01T00:00 Beijing 40 40 true 40 40 40.4 40.4 char40 40.000000000 40 +41 2017-10-01 2017-10-01T00:00 Beijing 41 41 true 41 41 41.41 41.41 char41 41.000000000 41 +42 2017-10-01 2017-10-01T00:00 Beijing 42 42 true 42 42 42.42 42.42 char42 42.000000000 42 +43 2017-10-01 2017-10-01T00:00 Beijing 43 43 true 43 43 43.43 43.43 char43 43.000000000 43 +44 2017-10-01 2017-10-01T00:00 Beijing 44 44 true 44 44 44.44 44.44 char44 44.000000000 44 +45 2017-10-01 2017-10-01T00:00 Beijing 45 45 true 45 45 45.45 45.45 char45 45.000000000 45 +46 2017-10-01 2017-10-01T00:00 Beijing 46 46 true 46 46 46.46 46.46 char46 46.000000000 46 +47 2017-10-01 2017-10-01T00:00 Beijing 47 47 true 47 47 47.47 47.47 char47 47.000000000 47 +48 2017-10-01 2017-10-01T00:00 Beijing 48 48 true 48 48 48.48 48.48 char48 48.000000000 48 +49 2017-10-01 2017-10-01T00:00 Beijing 49 49 true 49 49 49.49 49.49 char49 49.000000000 49 +50 2017-10-01 2017-10-01T00:00 Beijing 50 50 true 50 50 50.5 50.5 char50 50.000000000 50 +51 2017-10-01 2017-10-01T00:00 Beijing 51 51 true 51 51 51.51 51.51 char51 51.000000000 51 +52 2017-10-01 2017-10-01T00:00 Beijing 52 52 true 52 52 52.52 52.52 char52 52.000000000 52 +53 2017-10-01 2017-10-01T00:00 Beijing 53 53 true 53 53 53.53 53.53 char53 53.000000000 53 +54 2017-10-01 2017-10-01T00:00 Beijing 54 54 true 54 54 54.54 54.54 char54 54.000000000 54 +55 2017-10-01 2017-10-01T00:00 Beijing 55 55 true 55 55 55.55 55.55 char55 55.000000000 55 +56 2017-10-01 2017-10-01T00:00 Beijing 56 56 true 56 56 56.56 56.56 char56 56.000000000 56 +57 2017-10-01 2017-10-01T00:00 Beijing 57 57 true 57 57 57.57 57.57 char57 57.000000000 57 +58 2017-10-01 2017-10-01T00:00 Beijing 58 58 true 58 58 58.58 58.58 char58 58.000000000 58 +59 2017-10-01 2017-10-01T00:00 Beijing 59 59 true 59 59 59.59 59.59 char59 59.000000000 59 +60 2017-10-01 2017-10-01T00:00 Beijing 60 60 true 60 60 60.6 60.6 char60 60.000000000 60 +61 2017-10-01 2017-10-01T00:00 Beijing 61 61 true 61 61 61.61 61.61 char61 61.000000000 61 +62 2017-10-01 2017-10-01T00:00 Beijing 62 62 true 62 62 62.62 62.62 char62 62.000000000 62 +63 2017-10-01 2017-10-01T00:00 Beijing 63 63 true 63 63 63.63 63.63 char63 63.000000000 63 +64 2017-10-01 2017-10-01T00:00 Beijing 64 64 true 64 64 64.64 64.64 char64 64.000000000 64 +65 2017-10-01 2017-10-01T00:00 Beijing 65 65 true 65 65 65.65 65.65 char65 65.000000000 65 +66 2017-10-01 2017-10-01T00:00 Beijing 66 66 true 66 66 66.66 66.66 char66 66.000000000 66 +67 2017-10-01 2017-10-01T00:00 Beijing 67 67 true 67 67 67.67 67.67 char67 67.000000000 67 +68 2017-10-01 2017-10-01T00:00 Beijing 68 68 true 68 68 68.68 68.68 char68 68.000000000 68 +69 2017-10-01 2017-10-01T00:00 Beijing 69 69 true 69 69 69.69 69.69 char69 69.000000000 69 diff --git a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out index c8ff8cafdd9854..cd7fe1e40fdb2d 100644 --- a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out +++ b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out @@ -127,3 +127,15 @@ 9 doris_9 {"user_id":9, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":9, "sex":9, "bool_col":1, "int_col":9, "bigint_col":9, "largeint_col":"9", "float_col":9.9, "double_col":9.9, "char_col":"char9_1234", "decimal_col":9.000000000} 10 doris_10 {"user_id":10, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":null, "age":null, "sex":null, "bool_col":null, "int_col":null, "bigint_col":null, "largeint_col":null, "float_col":null, "double_col":null, "char_col":null, "decimal_col":null} +-- !select_load7 -- +1 doris_1 {"user_id":1, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":1, "sex":1, "bool_col":1, "int_col":1, "bigint_col":1, "largeint_col":"1", "float_col":1.1, "double_col":1.1, "char_col":"char1_1234", "decimal_col":1.000000000} +2 doris_2 {"user_id":2, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":2, "sex":2, "bool_col":1, "int_col":2, "bigint_col":2, "largeint_col":"2", "float_col":2.2, "double_col":2.2, "char_col":"char2_1234", "decimal_col":2.000000000} +3 doris_3 {"user_id":3, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":3, "sex":3, "bool_col":1, "int_col":3, "bigint_col":3, "largeint_col":"3", "float_col":3.3, "double_col":3.3, "char_col":"char3_1234", "decimal_col":3.000000000} +4 doris_4 {"user_id":4, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":4, "sex":4, "bool_col":1, "int_col":4, "bigint_col":4, "largeint_col":"4", "float_col":4.4, "double_col":4.4, "char_col":"char4_1234", "decimal_col":4.000000000} +5 doris_5 {"user_id":5, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":5, "sex":5, "bool_col":1, "int_col":5, "bigint_col":5, "largeint_col":"5", "float_col":5.5, "double_col":5.5, "char_col":"char5_1234", "decimal_col":5.000000000} +6 doris_6 {"user_id":6, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":6, "sex":6, "bool_col":1, "int_col":6, "bigint_col":6, "largeint_col":"6", "float_col":6.6, "double_col":6.6, "char_col":"char6_1234", "decimal_col":6.000000000} +7 doris_7 {"user_id":7, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":7, "sex":7, "bool_col":1, "int_col":7, "bigint_col":7, "largeint_col":"7", "float_col":7.7, "double_col":7.7, "char_col":"char7_1234", "decimal_col":7.000000000} +8 doris_8 {"user_id":8, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":8, "sex":8, "bool_col":1, "int_col":8, "bigint_col":8, "largeint_col":"8", "float_col":8.8, "double_col":8.800000000000001, "char_col":"char8_1234", "decimal_col":8.000000000} +9 doris_9 {"user_id":9, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":9, "sex":9, "bool_col":1, "int_col":9, "bigint_col":9, "largeint_col":"9", "float_col":9.9, "double_col":9.9, "char_col":"char9_1234", "decimal_col":9.000000000} +10 doris_10 {"user_id":10, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":null, "age":null, "sex":null, "bool_col":null, "int_col":null, "bigint_col":null, "largeint_col":null, "float_col":null, "double_col":null, "char_col":null, "decimal_col":null} + diff --git a/regression-test/data/export_p0/test_export_parquet.out b/regression-test/data/export_p0/test_export_parquet.out index c3358efa4a97af..941dd4469a66c8 100644 --- a/regression-test/data/export_p0/test_export_parquet.out +++ b/regression-test/data/export_p0/test_export_parquet.out @@ -102,104 +102,104 @@ 99 2017-10-01 2017-10-01T00:00 Beijing 99 99 true 99 99 99 99.99 99.99 char99 99 0.0.0.99 ::99 -- !select_load1 -- -1 2017-10-01 2017-10-01T00:00 Beijing 1 1 true 1 1 1.1 1.1 char1 1 1 1 ::1 -10 2017-10-01 2017-10-01T00:00 Beijing 10 10 true 10 10 10.1 10.1 char10 10 10 10 ::10 +1 2017-10-01 2017-10-01T00:00 Beijing 1 1 true 1 1 1.1 1.1 char1 1.000000000 1 1 ::1 +10 2017-10-01 2017-10-01T00:00 Beijing 10 10 true 10 10 10.1 10.1 char10 10.000000000 10 10 ::10 100 2017-10-01 2017-10-01T00:00 \N \N \N \N \N \N \N \N \N \N \N \N \N -11 2017-10-01 2017-10-01T00:00 Beijing 11 11 true 11 11 11.11 11.11 char11 11 11 11 ::11 -12 2017-10-01 2017-10-01T00:00 Beijing 12 12 true 12 12 12.12 12.12 char12 12 12 12 ::12 -13 2017-10-01 2017-10-01T00:00 Beijing 13 13 true 13 13 13.13 13.13 char13 13 13 13 ::13 -14 2017-10-01 2017-10-01T00:00 Beijing 14 14 true 14 14 14.14 14.14 char14 14 14 14 ::14 -15 2017-10-01 2017-10-01T00:00 Beijing 15 15 true 15 15 15.15 15.15 char15 15 15 15 ::15 -16 2017-10-01 2017-10-01T00:00 Beijing 16 16 true 16 16 16.16 16.16 char16 16 16 16 ::16 -17 2017-10-01 2017-10-01T00:00 Beijing 17 17 true 17 17 17.17 17.17 char17 17 17 17 ::17 -18 2017-10-01 2017-10-01T00:00 Beijing 18 18 true 18 18 18.18 18.18 char18 18 18 18 ::18 -19 2017-10-01 2017-10-01T00:00 Beijing 19 19 true 19 19 19.19 19.19 char19 19 19 19 ::19 -2 2017-10-01 2017-10-01T00:00 Beijing 2 2 true 2 2 2.2 2.2 char2 2 2 2 ::2 -20 2017-10-01 2017-10-01T00:00 Beijing 20 20 true 20 20 20.2 20.2 char20 20 20 20 ::20 -21 2017-10-01 2017-10-01T00:00 Beijing 21 21 true 21 21 21.21 21.21 char21 21 21 21 ::21 -22 2017-10-01 2017-10-01T00:00 Beijing 22 22 true 22 22 22.22 22.22 char22 22 22 22 ::22 -23 2017-10-01 2017-10-01T00:00 Beijing 23 23 true 23 23 23.23 23.23 char23 23 23 23 ::23 -24 2017-10-01 2017-10-01T00:00 Beijing 24 24 true 24 24 24.24 24.24 char24 24 24 24 ::24 -25 2017-10-01 2017-10-01T00:00 Beijing 25 25 true 25 25 25.25 25.25 char25 25 25 25 ::25 -26 2017-10-01 2017-10-01T00:00 Beijing 26 26 true 26 26 26.26 26.26 char26 26 26 26 ::26 -27 2017-10-01 2017-10-01T00:00 Beijing 27 27 true 27 27 27.27 27.27 char27 27 27 27 ::27 -28 2017-10-01 2017-10-01T00:00 Beijing 28 28 true 28 28 28.28 28.28 char28 28 28 28 ::28 -29 2017-10-01 2017-10-01T00:00 Beijing 29 29 true 29 29 29.29 29.29 char29 29 29 29 ::29 -3 2017-10-01 2017-10-01T00:00 Beijing 3 3 true 3 3 3.3 3.3 char3 3 3 3 ::3 -30 2017-10-01 2017-10-01T00:00 Beijing 30 30 true 30 30 30.3 30.3 char30 30 30 30 ::30 -31 2017-10-01 2017-10-01T00:00 Beijing 31 31 true 31 31 31.31 31.31 char31 31 31 31 ::31 -32 2017-10-01 2017-10-01T00:00 Beijing 32 32 true 32 32 32.32 32.32 char32 32 32 32 ::32 -33 2017-10-01 2017-10-01T00:00 Beijing 33 33 true 33 33 33.33 33.33 char33 33 33 33 ::33 -34 2017-10-01 2017-10-01T00:00 Beijing 34 34 true 34 34 34.34 34.34 char34 34 34 34 ::34 -35 2017-10-01 2017-10-01T00:00 Beijing 35 35 true 35 35 35.35 35.35 char35 35 35 35 ::35 -36 2017-10-01 2017-10-01T00:00 Beijing 36 36 true 36 36 36.36 36.36 char36 36 36 36 ::36 -37 2017-10-01 2017-10-01T00:00 Beijing 37 37 true 37 37 37.37 37.37 char37 37 37 37 ::37 -38 2017-10-01 2017-10-01T00:00 Beijing 38 38 true 38 38 38.38 38.38 char38 38 38 38 ::38 -39 2017-10-01 2017-10-01T00:00 Beijing 39 39 true 39 39 39.39 39.39 char39 39 39 39 ::39 -4 2017-10-01 2017-10-01T00:00 Beijing 4 4 true 4 4 4.4 4.4 char4 4 4 4 ::4 -40 2017-10-01 2017-10-01T00:00 Beijing 40 40 true 40 40 40.4 40.4 char40 40 40 40 ::40 -41 2017-10-01 2017-10-01T00:00 Beijing 41 41 true 41 41 41.41 41.41 char41 41 41 41 ::41 -42 2017-10-01 2017-10-01T00:00 Beijing 42 42 true 42 42 42.42 42.42 char42 42 42 42 ::42 -43 2017-10-01 2017-10-01T00:00 Beijing 43 43 true 43 43 43.43 43.43 char43 43 43 43 ::43 -44 2017-10-01 2017-10-01T00:00 Beijing 44 44 true 44 44 44.44 44.44 char44 44 44 44 ::44 -45 2017-10-01 2017-10-01T00:00 Beijing 45 45 true 45 45 45.45 45.45 char45 45 45 45 ::45 -46 2017-10-01 2017-10-01T00:00 Beijing 46 46 true 46 46 46.46 46.46 char46 46 46 46 ::46 -47 2017-10-01 2017-10-01T00:00 Beijing 47 47 true 47 47 47.47 47.47 char47 47 47 47 ::47 -48 2017-10-01 2017-10-01T00:00 Beijing 48 48 true 48 48 48.48 48.48 char48 48 48 48 ::48 -49 2017-10-01 2017-10-01T00:00 Beijing 49 49 true 49 49 49.49 49.49 char49 49 49 49 ::49 -5 2017-10-01 2017-10-01T00:00 Beijing 5 5 true 5 5 5.5 5.5 char5 5 5 5 ::5 -50 2017-10-01 2017-10-01T00:00 Beijing 50 50 true 50 50 50.5 50.5 char50 50 50 50 ::50 -51 2017-10-01 2017-10-01T00:00 Beijing 51 51 true 51 51 51.51 51.51 char51 51 51 51 ::51 -52 2017-10-01 2017-10-01T00:00 Beijing 52 52 true 52 52 52.52 52.52 char52 52 52 52 ::52 -53 2017-10-01 2017-10-01T00:00 Beijing 53 53 true 53 53 53.53 53.53 char53 53 53 53 ::53 -54 2017-10-01 2017-10-01T00:00 Beijing 54 54 true 54 54 54.54 54.54 char54 54 54 54 ::54 -55 2017-10-01 2017-10-01T00:00 Beijing 55 55 true 55 55 55.55 55.55 char55 55 55 55 ::55 -56 2017-10-01 2017-10-01T00:00 Beijing 56 56 true 56 56 56.56 56.56 char56 56 56 56 ::56 -57 2017-10-01 2017-10-01T00:00 Beijing 57 57 true 57 57 57.57 57.57 char57 57 57 57 ::57 -58 2017-10-01 2017-10-01T00:00 Beijing 58 58 true 58 58 58.58 58.58 char58 58 58 58 ::58 -59 2017-10-01 2017-10-01T00:00 Beijing 59 59 true 59 59 59.59 59.59 char59 59 59 59 ::59 -6 2017-10-01 2017-10-01T00:00 Beijing 6 6 true 6 6 6.6 6.6 char6 6 6 6 ::6 -60 2017-10-01 2017-10-01T00:00 Beijing 60 60 true 60 60 60.6 60.6 char60 60 60 60 ::60 -61 2017-10-01 2017-10-01T00:00 Beijing 61 61 true 61 61 61.61 61.61 char61 61 61 61 ::61 -62 2017-10-01 2017-10-01T00:00 Beijing 62 62 true 62 62 62.62 62.62 char62 62 62 62 ::62 -63 2017-10-01 2017-10-01T00:00 Beijing 63 63 true 63 63 63.63 63.63 char63 63 63 63 ::63 -64 2017-10-01 2017-10-01T00:00 Beijing 64 64 true 64 64 64.64 64.64 char64 64 64 64 ::64 -65 2017-10-01 2017-10-01T00:00 Beijing 65 65 true 65 65 65.65 65.65 char65 65 65 65 ::65 -66 2017-10-01 2017-10-01T00:00 Beijing 66 66 true 66 66 66.66 66.66 char66 66 66 66 ::66 -67 2017-10-01 2017-10-01T00:00 Beijing 67 67 true 67 67 67.67 67.67 char67 67 67 67 ::67 -68 2017-10-01 2017-10-01T00:00 Beijing 68 68 true 68 68 68.68 68.68 char68 68 68 68 ::68 -69 2017-10-01 2017-10-01T00:00 Beijing 69 69 true 69 69 69.69 69.69 char69 69 69 69 ::69 -7 2017-10-01 2017-10-01T00:00 Beijing 7 7 true 7 7 7.7 7.7 char7 7 7 7 ::7 -70 2017-10-01 2017-10-01T00:00 Beijing 70 70 true 70 70 70.7 70.7 char70 70 70 70 ::70 -71 2017-10-01 2017-10-01T00:00 Beijing 71 71 true 71 71 71.71 71.71 char71 71 71 71 ::71 -72 2017-10-01 2017-10-01T00:00 Beijing 72 72 true 72 72 72.72 72.72 char72 72 72 72 ::72 -73 2017-10-01 2017-10-01T00:00 Beijing 73 73 true 73 73 73.73 73.73 char73 73 73 73 ::73 -74 2017-10-01 2017-10-01T00:00 Beijing 74 74 true 74 74 74.74 74.74 char74 74 74 74 ::74 -75 2017-10-01 2017-10-01T00:00 Beijing 75 75 true 75 75 75.75 75.75 char75 75 75 75 ::75 -76 2017-10-01 2017-10-01T00:00 Beijing 76 76 true 76 76 76.76 76.76 char76 76 76 76 ::76 -77 2017-10-01 2017-10-01T00:00 Beijing 77 77 true 77 77 77.77 77.77 char77 77 77 77 ::77 -78 2017-10-01 2017-10-01T00:00 Beijing 78 78 true 78 78 78.78 78.78 char78 78 78 78 ::78 -79 2017-10-01 2017-10-01T00:00 Beijing 79 79 true 79 79 79.79 79.79 char79 79 79 79 ::79 -8 2017-10-01 2017-10-01T00:00 Beijing 8 8 true 8 8 8.8 8.8 char8 8 8 8 ::8 -80 2017-10-01 2017-10-01T00:00 Beijing 80 80 true 80 80 80.8 80.8 char80 80 80 80 ::80 -81 2017-10-01 2017-10-01T00:00 Beijing 81 81 true 81 81 81.81 81.81 char81 81 81 81 ::81 -82 2017-10-01 2017-10-01T00:00 Beijing 82 82 true 82 82 82.82 82.82 char82 82 82 82 ::82 -83 2017-10-01 2017-10-01T00:00 Beijing 83 83 true 83 83 83.83 83.83 char83 83 83 83 ::83 -84 2017-10-01 2017-10-01T00:00 Beijing 84 84 true 84 84 84.84 84.84 char84 84 84 84 ::84 -85 2017-10-01 2017-10-01T00:00 Beijing 85 85 true 85 85 85.85 85.85 char85 85 85 85 ::85 -86 2017-10-01 2017-10-01T00:00 Beijing 86 86 true 86 86 86.86 86.86 char86 86 86 86 ::86 -87 2017-10-01 2017-10-01T00:00 Beijing 87 87 true 87 87 87.87 87.87 char87 87 87 87 ::87 -88 2017-10-01 2017-10-01T00:00 Beijing 88 88 true 88 88 88.88 88.88 char88 88 88 88 ::88 -89 2017-10-01 2017-10-01T00:00 Beijing 89 89 true 89 89 89.89 89.89 char89 89 89 89 ::89 -9 2017-10-01 2017-10-01T00:00 Beijing 9 9 true 9 9 9.9 9.9 char9 9 9 9 ::9 -90 2017-10-01 2017-10-01T00:00 Beijing 90 90 true 90 90 90.9 90.9 char90 90 90 90 ::90 -91 2017-10-01 2017-10-01T00:00 Beijing 91 91 true 91 91 91.91 91.91 char91 91 91 91 ::91 -92 2017-10-01 2017-10-01T00:00 Beijing 92 92 true 92 92 92.92 92.92 char92 92 92 92 ::92 -93 2017-10-01 2017-10-01T00:00 Beijing 93 93 true 93 93 93.93 93.93 char93 93 93 93 ::93 -94 2017-10-01 2017-10-01T00:00 Beijing 94 94 true 94 94 94.94 94.94 char94 94 94 94 ::94 -95 2017-10-01 2017-10-01T00:00 Beijing 95 95 true 95 95 95.95 95.95 char95 95 95 95 ::95 -96 2017-10-01 2017-10-01T00:00 Beijing 96 96 true 96 96 96.96 96.96 char96 96 96 96 ::96 -97 2017-10-01 2017-10-01T00:00 Beijing 97 97 true 97 97 97.97 97.97 char97 97 97 97 ::97 -98 2017-10-01 2017-10-01T00:00 Beijing 98 98 true 98 98 98.98 98.98 char98 98 98 98 ::98 -99 2017-10-01 2017-10-01T00:00 Beijing 99 99 true 99 99 99.99 99.99 char99 99 99 99 ::99 +11 2017-10-01 2017-10-01T00:00 Beijing 11 11 true 11 11 11.11 11.11 char11 11.000000000 11 11 ::11 +12 2017-10-01 2017-10-01T00:00 Beijing 12 12 true 12 12 12.12 12.12 char12 12.000000000 12 12 ::12 +13 2017-10-01 2017-10-01T00:00 Beijing 13 13 true 13 13 13.13 13.13 char13 13.000000000 13 13 ::13 +14 2017-10-01 2017-10-01T00:00 Beijing 14 14 true 14 14 14.14 14.14 char14 14.000000000 14 14 ::14 +15 2017-10-01 2017-10-01T00:00 Beijing 15 15 true 15 15 15.15 15.15 char15 15.000000000 15 15 ::15 +16 2017-10-01 2017-10-01T00:00 Beijing 16 16 true 16 16 16.16 16.16 char16 16.000000000 16 16 ::16 +17 2017-10-01 2017-10-01T00:00 Beijing 17 17 true 17 17 17.17 17.17 char17 17.000000000 17 17 ::17 +18 2017-10-01 2017-10-01T00:00 Beijing 18 18 true 18 18 18.18 18.18 char18 18.000000000 18 18 ::18 +19 2017-10-01 2017-10-01T00:00 Beijing 19 19 true 19 19 19.19 19.19 char19 19.000000000 19 19 ::19 +2 2017-10-01 2017-10-01T00:00 Beijing 2 2 true 2 2 2.2 2.2 char2 2.000000000 2 2 ::2 +20 2017-10-01 2017-10-01T00:00 Beijing 20 20 true 20 20 20.2 20.2 char20 20.000000000 20 20 ::20 +21 2017-10-01 2017-10-01T00:00 Beijing 21 21 true 21 21 21.21 21.21 char21 21.000000000 21 21 ::21 +22 2017-10-01 2017-10-01T00:00 Beijing 22 22 true 22 22 22.22 22.22 char22 22.000000000 22 22 ::22 +23 2017-10-01 2017-10-01T00:00 Beijing 23 23 true 23 23 23.23 23.23 char23 23.000000000 23 23 ::23 +24 2017-10-01 2017-10-01T00:00 Beijing 24 24 true 24 24 24.24 24.24 char24 24.000000000 24 24 ::24 +25 2017-10-01 2017-10-01T00:00 Beijing 25 25 true 25 25 25.25 25.25 char25 25.000000000 25 25 ::25 +26 2017-10-01 2017-10-01T00:00 Beijing 26 26 true 26 26 26.26 26.26 char26 26.000000000 26 26 ::26 +27 2017-10-01 2017-10-01T00:00 Beijing 27 27 true 27 27 27.27 27.27 char27 27.000000000 27 27 ::27 +28 2017-10-01 2017-10-01T00:00 Beijing 28 28 true 28 28 28.28 28.28 char28 28.000000000 28 28 ::28 +29 2017-10-01 2017-10-01T00:00 Beijing 29 29 true 29 29 29.29 29.29 char29 29.000000000 29 29 ::29 +3 2017-10-01 2017-10-01T00:00 Beijing 3 3 true 3 3 3.3 3.3 char3 3.000000000 3 3 ::3 +30 2017-10-01 2017-10-01T00:00 Beijing 30 30 true 30 30 30.3 30.3 char30 30.000000000 30 30 ::30 +31 2017-10-01 2017-10-01T00:00 Beijing 31 31 true 31 31 31.31 31.31 char31 31.000000000 31 31 ::31 +32 2017-10-01 2017-10-01T00:00 Beijing 32 32 true 32 32 32.32 32.32 char32 32.000000000 32 32 ::32 +33 2017-10-01 2017-10-01T00:00 Beijing 33 33 true 33 33 33.33 33.33 char33 33.000000000 33 33 ::33 +34 2017-10-01 2017-10-01T00:00 Beijing 34 34 true 34 34 34.34 34.34 char34 34.000000000 34 34 ::34 +35 2017-10-01 2017-10-01T00:00 Beijing 35 35 true 35 35 35.35 35.35 char35 35.000000000 35 35 ::35 +36 2017-10-01 2017-10-01T00:00 Beijing 36 36 true 36 36 36.36 36.36 char36 36.000000000 36 36 ::36 +37 2017-10-01 2017-10-01T00:00 Beijing 37 37 true 37 37 37.37 37.37 char37 37.000000000 37 37 ::37 +38 2017-10-01 2017-10-01T00:00 Beijing 38 38 true 38 38 38.38 38.38 char38 38.000000000 38 38 ::38 +39 2017-10-01 2017-10-01T00:00 Beijing 39 39 true 39 39 39.39 39.39 char39 39.000000000 39 39 ::39 +4 2017-10-01 2017-10-01T00:00 Beijing 4 4 true 4 4 4.4 4.4 char4 4.000000000 4 4 ::4 +40 2017-10-01 2017-10-01T00:00 Beijing 40 40 true 40 40 40.4 40.4 char40 40.000000000 40 40 ::40 +41 2017-10-01 2017-10-01T00:00 Beijing 41 41 true 41 41 41.41 41.41 char41 41.000000000 41 41 ::41 +42 2017-10-01 2017-10-01T00:00 Beijing 42 42 true 42 42 42.42 42.42 char42 42.000000000 42 42 ::42 +43 2017-10-01 2017-10-01T00:00 Beijing 43 43 true 43 43 43.43 43.43 char43 43.000000000 43 43 ::43 +44 2017-10-01 2017-10-01T00:00 Beijing 44 44 true 44 44 44.44 44.44 char44 44.000000000 44 44 ::44 +45 2017-10-01 2017-10-01T00:00 Beijing 45 45 true 45 45 45.45 45.45 char45 45.000000000 45 45 ::45 +46 2017-10-01 2017-10-01T00:00 Beijing 46 46 true 46 46 46.46 46.46 char46 46.000000000 46 46 ::46 +47 2017-10-01 2017-10-01T00:00 Beijing 47 47 true 47 47 47.47 47.47 char47 47.000000000 47 47 ::47 +48 2017-10-01 2017-10-01T00:00 Beijing 48 48 true 48 48 48.48 48.48 char48 48.000000000 48 48 ::48 +49 2017-10-01 2017-10-01T00:00 Beijing 49 49 true 49 49 49.49 49.49 char49 49.000000000 49 49 ::49 +5 2017-10-01 2017-10-01T00:00 Beijing 5 5 true 5 5 5.5 5.5 char5 5.000000000 5 5 ::5 +50 2017-10-01 2017-10-01T00:00 Beijing 50 50 true 50 50 50.5 50.5 char50 50.000000000 50 50 ::50 +51 2017-10-01 2017-10-01T00:00 Beijing 51 51 true 51 51 51.51 51.51 char51 51.000000000 51 51 ::51 +52 2017-10-01 2017-10-01T00:00 Beijing 52 52 true 52 52 52.52 52.52 char52 52.000000000 52 52 ::52 +53 2017-10-01 2017-10-01T00:00 Beijing 53 53 true 53 53 53.53 53.53 char53 53.000000000 53 53 ::53 +54 2017-10-01 2017-10-01T00:00 Beijing 54 54 true 54 54 54.54 54.54 char54 54.000000000 54 54 ::54 +55 2017-10-01 2017-10-01T00:00 Beijing 55 55 true 55 55 55.55 55.55 char55 55.000000000 55 55 ::55 +56 2017-10-01 2017-10-01T00:00 Beijing 56 56 true 56 56 56.56 56.56 char56 56.000000000 56 56 ::56 +57 2017-10-01 2017-10-01T00:00 Beijing 57 57 true 57 57 57.57 57.57 char57 57.000000000 57 57 ::57 +58 2017-10-01 2017-10-01T00:00 Beijing 58 58 true 58 58 58.58 58.58 char58 58.000000000 58 58 ::58 +59 2017-10-01 2017-10-01T00:00 Beijing 59 59 true 59 59 59.59 59.59 char59 59.000000000 59 59 ::59 +6 2017-10-01 2017-10-01T00:00 Beijing 6 6 true 6 6 6.6 6.6 char6 6.000000000 6 6 ::6 +60 2017-10-01 2017-10-01T00:00 Beijing 60 60 true 60 60 60.6 60.6 char60 60.000000000 60 60 ::60 +61 2017-10-01 2017-10-01T00:00 Beijing 61 61 true 61 61 61.61 61.61 char61 61.000000000 61 61 ::61 +62 2017-10-01 2017-10-01T00:00 Beijing 62 62 true 62 62 62.62 62.62 char62 62.000000000 62 62 ::62 +63 2017-10-01 2017-10-01T00:00 Beijing 63 63 true 63 63 63.63 63.63 char63 63.000000000 63 63 ::63 +64 2017-10-01 2017-10-01T00:00 Beijing 64 64 true 64 64 64.64 64.64 char64 64.000000000 64 64 ::64 +65 2017-10-01 2017-10-01T00:00 Beijing 65 65 true 65 65 65.65 65.65 char65 65.000000000 65 65 ::65 +66 2017-10-01 2017-10-01T00:00 Beijing 66 66 true 66 66 66.66 66.66 char66 66.000000000 66 66 ::66 +67 2017-10-01 2017-10-01T00:00 Beijing 67 67 true 67 67 67.67 67.67 char67 67.000000000 67 67 ::67 +68 2017-10-01 2017-10-01T00:00 Beijing 68 68 true 68 68 68.68 68.68 char68 68.000000000 68 68 ::68 +69 2017-10-01 2017-10-01T00:00 Beijing 69 69 true 69 69 69.69 69.69 char69 69.000000000 69 69 ::69 +7 2017-10-01 2017-10-01T00:00 Beijing 7 7 true 7 7 7.7 7.7 char7 7.000000000 7 7 ::7 +70 2017-10-01 2017-10-01T00:00 Beijing 70 70 true 70 70 70.7 70.7 char70 70.000000000 70 70 ::70 +71 2017-10-01 2017-10-01T00:00 Beijing 71 71 true 71 71 71.71 71.71 char71 71.000000000 71 71 ::71 +72 2017-10-01 2017-10-01T00:00 Beijing 72 72 true 72 72 72.72 72.72 char72 72.000000000 72 72 ::72 +73 2017-10-01 2017-10-01T00:00 Beijing 73 73 true 73 73 73.73 73.73 char73 73.000000000 73 73 ::73 +74 2017-10-01 2017-10-01T00:00 Beijing 74 74 true 74 74 74.74 74.74 char74 74.000000000 74 74 ::74 +75 2017-10-01 2017-10-01T00:00 Beijing 75 75 true 75 75 75.75 75.75 char75 75.000000000 75 75 ::75 +76 2017-10-01 2017-10-01T00:00 Beijing 76 76 true 76 76 76.76 76.76 char76 76.000000000 76 76 ::76 +77 2017-10-01 2017-10-01T00:00 Beijing 77 77 true 77 77 77.77 77.77 char77 77.000000000 77 77 ::77 +78 2017-10-01 2017-10-01T00:00 Beijing 78 78 true 78 78 78.78 78.78 char78 78.000000000 78 78 ::78 +79 2017-10-01 2017-10-01T00:00 Beijing 79 79 true 79 79 79.79 79.79 char79 79.000000000 79 79 ::79 +8 2017-10-01 2017-10-01T00:00 Beijing 8 8 true 8 8 8.8 8.8 char8 8.000000000 8 8 ::8 +80 2017-10-01 2017-10-01T00:00 Beijing 80 80 true 80 80 80.8 80.8 char80 80.000000000 80 80 ::80 +81 2017-10-01 2017-10-01T00:00 Beijing 81 81 true 81 81 81.81 81.81 char81 81.000000000 81 81 ::81 +82 2017-10-01 2017-10-01T00:00 Beijing 82 82 true 82 82 82.82 82.82 char82 82.000000000 82 82 ::82 +83 2017-10-01 2017-10-01T00:00 Beijing 83 83 true 83 83 83.83 83.83 char83 83.000000000 83 83 ::83 +84 2017-10-01 2017-10-01T00:00 Beijing 84 84 true 84 84 84.84 84.84 char84 84.000000000 84 84 ::84 +85 2017-10-01 2017-10-01T00:00 Beijing 85 85 true 85 85 85.85 85.85 char85 85.000000000 85 85 ::85 +86 2017-10-01 2017-10-01T00:00 Beijing 86 86 true 86 86 86.86 86.86 char86 86.000000000 86 86 ::86 +87 2017-10-01 2017-10-01T00:00 Beijing 87 87 true 87 87 87.87 87.87 char87 87.000000000 87 87 ::87 +88 2017-10-01 2017-10-01T00:00 Beijing 88 88 true 88 88 88.88 88.88 char88 88.000000000 88 88 ::88 +89 2017-10-01 2017-10-01T00:00 Beijing 89 89 true 89 89 89.89 89.89 char89 89.000000000 89 89 ::89 +9 2017-10-01 2017-10-01T00:00 Beijing 9 9 true 9 9 9.9 9.9 char9 9.000000000 9 9 ::9 +90 2017-10-01 2017-10-01T00:00 Beijing 90 90 true 90 90 90.9 90.9 char90 90.000000000 90 90 ::90 +91 2017-10-01 2017-10-01T00:00 Beijing 91 91 true 91 91 91.91 91.91 char91 91.000000000 91 91 ::91 +92 2017-10-01 2017-10-01T00:00 Beijing 92 92 true 92 92 92.92 92.92 char92 92.000000000 92 92 ::92 +93 2017-10-01 2017-10-01T00:00 Beijing 93 93 true 93 93 93.93 93.93 char93 93.000000000 93 93 ::93 +94 2017-10-01 2017-10-01T00:00 Beijing 94 94 true 94 94 94.94 94.94 char94 94.000000000 94 94 ::94 +95 2017-10-01 2017-10-01T00:00 Beijing 95 95 true 95 95 95.95 95.95 char95 95.000000000 95 95 ::95 +96 2017-10-01 2017-10-01T00:00 Beijing 96 96 true 96 96 96.96 96.96 char96 96.000000000 96 96 ::96 +97 2017-10-01 2017-10-01T00:00 Beijing 97 97 true 97 97 97.97 97.97 char97 97.000000000 97 97 ::97 +98 2017-10-01 2017-10-01T00:00 Beijing 98 98 true 98 98 98.98 98.98 char98 98.000000000 98 98 ::98 +99 2017-10-01 2017-10-01T00:00 Beijing 99 99 true 99 99 99.99 99.99 char99 99.000000000 99 99 ::99 diff --git a/regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.out b/regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.out index 59e94ef9429ec9..784ad963ce4a72 100644 --- a/regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.out +++ b/regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.out @@ -30,14 +30,14 @@ 8 nereids \N -- !select_base2 -- -1 2023-04-20 2023-04-20 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing Haidian 1 1 true 1 1 1 1.1 1.1 char1 1 1 1 0.1 1.00000000 1.0000000000 1 1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000 -2 9999-12-31 9999-12-31 9999-12-31T23:59:59 9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400 Haidian -32768 -128 true -2147483648 -9223372036854775808 -170141183460469231731687303715884105728 1.4E-45 4.9E-324 char2 100000000 100000000 4 0.1 0.99999999 9999999999.9999999999 99999999999999999999999999999999999999 9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999 -3 2023-04-21 2023-04-21 2023-04-20T12:34:56 2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456 Beijing 32767 127 true 2147483647 9223372036854775807 170141183460469231731687303715884105727 3.4028235e+38 1.7976931348623157E308 char3 999999999 999999999 9 0.9 9.99999999 1234567890.0123456789 12345678901234567890123456789012345678 1.2345678901234567890123456789012345678 0.12345678901234567890123456789012345678 +1 2023-04-20 2023-04-20 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing Haidian 1 1 true 1 1 1 1.1 1.1 char1 1.000000000 1.000000000 1 0.1 1.00000000 1.0000000000 1 1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000 +2 9999-12-31 9999-12-31 9999-12-31T23:59:59 9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400 Haidian -32768 -128 true -2147483648 -9223372036854775808 -170141183460469231731687303715884105728 1.4E-45 4.940656458412465e-324 char2 100000000.000000000 100000000.000000000 4 0.1 0.99999999 9999999999.9999999999 99999999999999999999999999999999999999 9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999 +3 2023-04-21 2023-04-21 2023-04-20T12:34:56 2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456 Beijing 32767 127 true 2147483647 9223372036854775807 170141183460469231731687303715884105727 3.402823E38 1.797693134862316e+308 char3 999999999.000000000 999999999.000000000 9 0.9 9.99999999 1234567890.0123456789 12345678901234567890123456789012345678 1.2345678901234567890123456789012345678 0.12345678901234567890123456789012345678 -- !select_tvf2 -- -1 2023-04-20 2023-04-20 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing Haidian 1 1 true 1 1 1 1.1 1.1 char1 1 1 1 0.1 1.00000000 1.0000000000 1 1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000 -2 9999-12-31 9999-12-31 9999-12-31T23:59:59 9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400 Haidian -32768 -128 true -2147483648 -9223372036854775808 -170141183460469231731687303715884105728 1.4E-45 4.9E-324 char2 100000000 100000000 4 0.1 0.99999999 9999999999.9999999999 99999999999999999999999999999999999999 9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999 -3 2023-04-21 2023-04-21 2023-04-20T12:34:56 2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456 Beijing 32767 127 true 2147483647 9223372036854775807 170141183460469231731687303715884105727 3.4028235e+38 1.7976931348623157E308 char3 999999999 999999999 9 0.9 9.99999999 1234567890.0123456789 12345678901234567890123456789012345678 1.2345678901234567890123456789012345678 0.12345678901234567890123456789012345678 +1 2023-04-20 2023-04-20 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing Haidian 1 1 true 1 1 1 1.1 1.1 char1 1.000000000 1.000000000 1 0.1 1.00000000 1.0000000000 1 1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000 +2 9999-12-31 9999-12-31 9999-12-31T23:59:59 9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400 Haidian -32768 -128 true -2147483648 -9223372036854775808 -170141183460469231731687303715884105728 1.4E-45 4.940656458412465e-324 char2 100000000.000000000 100000000.000000000 4 0.1 0.99999999 9999999999.9999999999 99999999999999999999999999999999999999 9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999 +3 2023-04-21 2023-04-21 2023-04-20T12:34:56 2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456 Beijing 32767 127 true 2147483647 9223372036854775807 170141183460469231731687303715884105727 3.402823E38 1.797693134862316e+308 char3 999999999.000000000 999999999.000000000 9 0.9 9.99999999 1234567890.0123456789 12345678901234567890123456789012345678 1.2345678901234567890123456789012345678 0.12345678901234567890123456789012345678 -- !hive_docker_02 -- 1 2023-04-20 2023-04-20 2023-04-19 16:00:00.0 2023-04-19 16:00:00.0 2023-04-19 16:00:00.0 2023-04-19 16:00:00.0 Beijing Haidian 1 1 true 1 1 1 1.1 1.1 char1 1 1 1 0.1 1.00000000 1.0000000000 1 1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000 @@ -75,14 +75,14 @@ 8 nereids \N -- !select_base2 -- -1 2023-04-20 2023-04-20 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing Haidian 1 1 true 1 1 1 1.1 1.1 char1 1 1 1 0.1 1.00000000 1.0000000000 1 1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000 -2 9999-12-31 9999-12-31 9999-12-31T23:59:59 9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400 Haidian -32768 -128 true -2147483648 -9223372036854775808 -170141183460469231731687303715884105728 1.4E-45 4.9E-324 char2 100000000 100000000 4 0.1 0.99999999 9999999999.9999999999 99999999999999999999999999999999999999 9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999 -3 2023-04-21 2023-04-21 2023-04-20T12:34:56 2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456 Beijing 32767 127 true 2147483647 9223372036854775807 170141183460469231731687303715884105727 3.4028235e+38 1.7976931348623157E308 char3 999999999 999999999 9 0.9 9.99999999 1234567890.0123456789 12345678901234567890123456789012345678 1.2345678901234567890123456789012345678 0.12345678901234567890123456789012345678 +1 2023-04-20 2023-04-20 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing Haidian 1 1 true 1 1 1 1.1 1.1 char1 1.000000000 1.000000000 1 0.1 1.00000000 1.0000000000 1 1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000 +2 9999-12-31 9999-12-31 9999-12-31T23:59:59 9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400 Haidian -32768 -128 true -2147483648 -9223372036854775808 -170141183460469231731687303715884105728 1.4E-45 4.940656458412465e-324 char2 100000000.000000000 100000000.000000000 4 0.1 0.99999999 9999999999.9999999999 99999999999999999999999999999999999999 9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999 +3 2023-04-21 2023-04-21 2023-04-20T12:34:56 2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456 Beijing 32767 127 true 2147483647 9223372036854775807 170141183460469231731687303715884105727 3.402823E38 1.797693134862316e+308 char3 999999999.000000000 999999999.000000000 9 0.9 9.99999999 1234567890.0123456789 12345678901234567890123456789012345678 1.2345678901234567890123456789012345678 0.12345678901234567890123456789012345678 -- !select_tvf2 -- -1 2023-04-20 2023-04-20 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing Haidian 1 1 true 1 1 1 1.1 1.1 char1 1 1 1 0.1 1.00000000 1.0000000000 1 1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000 -2 9999-12-31 9999-12-31 9999-12-31T23:59:59 9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400 Haidian -32768 -128 true -2147483648 -9223372036854775808 -170141183460469231731687303715884105728 1.4E-45 4.9E-324 char2 100000000 100000000 4 0.1 0.99999999 9999999999.9999999999 99999999999999999999999999999999999999 9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999 -3 2023-04-21 2023-04-21 2023-04-20T12:34:56 2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456 Beijing 32767 127 true 2147483647 9223372036854775807 170141183460469231731687303715884105727 3.4028235e+38 1.7976931348623157E308 char3 999999999 999999999 9 0.9 9.99999999 1234567890.0123456789 12345678901234567890123456789012345678 1.2345678901234567890123456789012345678 0.12345678901234567890123456789012345678 +1 2023-04-20 2023-04-20 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing Haidian 1 1 true 1 1 1 1.1 1.1 char1 1.000000000 1.000000000 1 0.1 1.00000000 1.0000000000 1 1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000 +2 9999-12-31 9999-12-31 9999-12-31T23:59:59 9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400 Haidian -32768 -128 true -2147483648 -9223372036854775808 -170141183460469231731687303715884105728 1.4E-45 4.940656458412465e-324 char2 100000000.000000000 100000000.000000000 4 0.1 0.99999999 9999999999.9999999999 99999999999999999999999999999999999999 9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999 +3 2023-04-21 2023-04-21 2023-04-20T12:34:56 2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456 Beijing 32767 127 true 2147483647 9223372036854775807 170141183460469231731687303715884105727 3.402823E38 1.797693134862316e+308 char3 999999999.000000000 999999999.000000000 9 0.9 9.99999999 1234567890.0123456789 12345678901234567890123456789012345678 1.2345678901234567890123456789012345678 0.12345678901234567890123456789012345678 -- !hive_docker_02 -- 1 2023-04-20 2023-04-20 2023-04-19 16:00:00.0 2023-04-19 16:00:00.0 2023-04-19 16:00:00.0 2023-04-19 16:00:00.0 Beijing Haidian 1 1 true 1 1 1 1.1 1.1 char1 1 1 1 0.1 1.00000000 1.0000000000 1 1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000 diff --git a/regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_comlex_type.out b/regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_complex_type.out similarity index 100% rename from regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_comlex_type.out rename to regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_complex_type.out diff --git a/regression-test/data/external_table_p0/hive/ddl/test_hive_ctas.out b/regression-test/data/external_table_p0/hive/ddl/test_hive_ctas.out index 160c99248fe90c..9adea59bbfba3e 100644 --- a/regression-test/data/external_table_p0/hive/ddl/test_hive_ctas.out +++ b/regression-test/data/external_table_p0/hive/ddl/test_hive_ctas.out @@ -199,203 +199,3 @@ true 127 32767 2147483647 default 22.12345 3.141592653 99999.9999 default -- !hive_docker_ctas_types_02 -- true 127 32767 2147483647 default 22.12345 3.141592653 99999.9999 default --- !ctas_01 -- -2 -3 - --- !hive_docker_ctas_01 -- -2 -3 - --- !ctas_02 -- -2 -3 - --- !hive_docker_ctas_02 -- -2 -3 - --- !ctas_03 -- -22 value_for_pt11 value_for_pt22 - --- !hive_docker_ctas_03 -- -22 value_for_pt11 value_for_pt22 - --- !ctas_04 -- -22 value_for_pt11 value_for_pt22 - --- !hive_docker_ctas_04 -- -22 value_for_pt11 value_for_pt22 - --- !ctas_05 -- -11 value_for_pt1 value_for_pt2 -22 value_for_pt11 value_for_pt22 - --- !hive_docker_ctas_05 -- -11 value_for_pt1 value_for_pt2 -22 value_for_pt11 value_for_pt22 - --- !ctas_06 -- -11 value_for_pt1 value_for_pt2 -22 value_for_pt11 value_for_pt22 - --- !hive_docker_ctas_06 -- -11 value_for_pt1 value_for_pt2 -22 value_for_pt11 value_for_pt22 - --- !ctas_ex01 -- -2 -3 - --- !hive_docker_ctas_ex01 -- -2 -3 - --- !ctas_ex02 -- -11 value_for_pt1 value_for_pt2 -22 value_for_pt11 \N -22 value_for_pt11 value_for_pt22 - --- !hive_docker_ctas_ex02 -- -11 value_for_pt1 value_for_pt2 -22 value_for_pt11 __HIVE_DEFAULT_PARTITION__ -22 value_for_pt11 value_for_pt22 - --- !ctas_03 -- -\N another string value for col2 -\N string value for col2 -\N yet another string value for col2 - --- !hive_docker_ctas_ex03 -- -\N another string value for col2 -\N string value for col2 -\N yet another string value for col2 - --- !ctas_04 -- -\N 11 value_for_pt1 -\N 22 value_for_pt11 - --- !hive_docker_ctas_ex04 -- -\N 11 value_for_pt1 -\N 22 value_for_pt11 - --- !qualified_table1 -- -11 value_for_pt1 -22 value_for_pt11 - --- !qualified_table2 -- -11 value_for_pt1 -22 value_for_pt11 - --- !ctas_types_01 -- -true 127 32767 2147483647 9223372036854775807 default 22.12345 3.141592653 99999.9999 default default 2023-05-29 2023-05-29T23:19:34 - --- !hive_docker_ctas_types_01 -- -true 127 32767 2147483647 9223372036854775807 default 22.12345 3.141592653 99999.9999 default default 2023-05-29 2023-05-29 23:19:34.0 - --- !ctas_types_02 -- -true 127 32767 2147483647 default 22.12345 3.141592653 99999.9999 default - --- !hive_docker_ctas_types_02 -- -true 127 32767 2147483647 default 22.12345 3.141592653 99999.9999 default - --- !ctas_01 -- -2 -3 - --- !hive_docker_ctas_01 -- -2 -3 - --- !ctas_02 -- -2 -3 - --- !hive_docker_ctas_02 -- -2 -3 - --- !ctas_03 -- -22 value_for_pt11 value_for_pt22 - --- !hive_docker_ctas_03 -- -22 value_for_pt11 value_for_pt22 - --- !ctas_04 -- -22 value_for_pt11 value_for_pt22 - --- !hive_docker_ctas_04 -- -22 value_for_pt11 value_for_pt22 - --- !ctas_05 -- -11 value_for_pt1 value_for_pt2 -22 value_for_pt11 value_for_pt22 - --- !hive_docker_ctas_05 -- -11 value_for_pt1 value_for_pt2 -22 value_for_pt11 value_for_pt22 - --- !ctas_06 -- -11 value_for_pt1 value_for_pt2 -22 value_for_pt11 value_for_pt22 - --- !hive_docker_ctas_06 -- -11 value_for_pt1 value_for_pt2 -22 value_for_pt11 value_for_pt22 - --- !ctas_ex01 -- -2 -3 - --- !hive_docker_ctas_ex01 -- -2 -3 - --- !ctas_ex02 -- -11 value_for_pt1 value_for_pt2 -22 value_for_pt11 \N -22 value_for_pt11 value_for_pt22 - --- !hive_docker_ctas_ex02 -- -11 value_for_pt1 value_for_pt2 -22 value_for_pt11 __HIVE_DEFAULT_PARTITION__ -22 value_for_pt11 value_for_pt22 - --- !ctas_03 -- -\N another string value for col2 -\N string value for col2 -\N yet another string value for col2 - --- !hive_docker_ctas_ex03 -- -\N another string value for col2 -\N string value for col2 -\N yet another string value for col2 - --- !ctas_04 -- -\N 11 value_for_pt1 -\N 22 value_for_pt11 - --- !hive_docker_ctas_ex04 -- -\N 11 value_for_pt1 -\N 22 value_for_pt11 - --- !qualified_table1 -- -11 value_for_pt1 -22 value_for_pt11 - --- !qualified_table2 -- -11 value_for_pt1 -22 value_for_pt11 - --- !ctas_types_01 -- -true 127 32767 2147483647 9223372036854775807 default 22.12345 3.141592653 99999.9999 default default 2023-05-29 2023-05-29T23:19:34 - --- !hive_docker_ctas_types_01 -- -true 127 32767 2147483647 9223372036854775807 default 22.12345 3.141592653 99999.9999 default default 2023-05-29 2023-05-29 23:19:34.0 - --- !ctas_types_02 -- -true 127 32767 2147483647 default 22.12345 3.141592653 99999.9999 default - --- !hive_docker_ctas_types_02 -- -true 127 32767 2147483647 default 22.12345 3.141592653 99999.9999 default - diff --git a/regression-test/data/external_table_p0/hive/test_complex_types.out b/regression-test/data/external_table_p0/hive/test_complex_types.out index 4a9dbbe835c7fc..5f81514ea59b23 100644 --- a/regression-test/data/external_table_p0/hive/test_complex_types.out +++ b/regression-test/data/external_table_p0/hive/test_complex_types.out @@ -47,51 +47,3 @@ -- !date_dict -- 2036-12-28 1898-12-28 2539-12-28 --- !null_element_at -- -0 - --- !map_key_select -- -38111 0.770169659057425 - --- !map_keys -- -["9wXr9n-TBm9Wyt-r8H-SkAq", "CPDH4G-ZXGPkku-3wY-ktaQ", "RvNlMt-HHjHN5M-VjP-xHAI", "qKIhKy-Ws344os-haX-2pmT", "DOJJ5l-UEkwVMs-x9F-HifD", "m871g8-1eFi7jt-oBq-S0yc", "wXugVP-v2fc6IF-DeU-On3T", "B0mXFX-QvgUgo7-Dih-6rDu", "E9zv3F-xMqSbMa-il4-FuDg", "msuFIN-ZkKO8TY-tu4-veH0", "0rSUyl-Un07aIW-KAx-WHnX", "XvbmO8-WA6oAqc-ihc-s8IL", "G6B6RD-AicAlZb-16u-Pn1I", "coDK0Q-tMg1294-JMQ-ZWQu", "4c0aWh-yhL6BOX-rRu-1n0r", "G4iUcG-ZhWw62v-VLt-n6lH", "IIB7qD-WQistwT-Vux-0c9B", "7cTyuR-5ssXm2S-sJR-JTIZ", "3KPhSW-FICEImf-bba-PCiQ", "qQ7Yup-XBeQGFz-3EP-q0vd", "gjRxRo-Af9Oqx5-IzN-3B9d", "1zSj57-nNZpZ0b-ZKn-BeY0", "sTK0mn-wkp1Xp5-PRS-txVM", "sLrM0s-1KnXLb6-1A3-Z1vJ", "UkYdkP-k7YKiKS-Fxp-qAcI", "v8p0YV-R5pAKZ8-UMr-P1bQ", "RJdTav-jk3os9Z-yRk-WhwV", "lB91ic-pNFZkE4-hBx-e104", "gmRV6e-GKJUg0L-ok7-J6Lz", "o3LUyz-7Toh54O-czG-Xep8", "8fzHhM-4otPAss-qTm-phg8", "kZsHhe-vfClpAR-b3H-7aHl", "TdZnlG-BUgMs7Z-iBM-9c3v", "RipJXn-p4gZkyy-1ZY-xkWe", "ke730M-LmMjGdc-EFy-0LUK", "jBSExJ-GXTc5TB-NSa-xBEd", "kI7Cc8-DSg5RdF-qLo-2bhe", "bAn3VI-x6xXWpB-zWe-G5CJ", "jAil30-kbt6K6z-kbr-8foB", "IHIwNs-1QGqy8l-i8i-vu4G", "p0IbZr-tHCtwiV-0hq-NtIt", "iggdij-M3YNBpd-yiD-a8Ro", "BrJEww-C4LpgaS-AeB-So4U", "xnO3Fi-8rXcpgj-zpm-EmuX", "5w57da-phYtDUx-px2-6frG", "31MfFs-1WyUAr6-gQ0-xLxY", "ryBl2p-rSoPhwd-WPv-NCAU", "KN5TEt-gOfJ4Hy-3pp-HiBa", "ytqxb8-utXXjUf-m41-i6ir", "WhGUGz-zzyvEpD-9BM-2bVf", "dE1tFe-zHClt4u-0cY-TQnC", "MveBhC-g29c0dU-tCT-R6nC", "JTpxue-xSqAhGo-AZk-zB1t", "92TVdU-qDJesPN-0lb-JOd3", "0PODnh-IciBdOZ-0CS-oNeL", "KkkW6x-TiemXQw-OiH-dZ9s", "PIs5Aj-g02HRXw-957-GD2z", "yJIzuw-au6460e-0Tl-XYEJ", "KHvMCD-OQDL0eX-nqK-TmEt", "6QJJgV-Z3IZ1Rf-wyv-rIJ6", "qA9ycc-sR2qm6P-PtB-AIax", "uDeuEb-B0t0Ljr-dWk-jkC4", "5vPy52-ygN0MMH-UB4-nZQL", "zbbmrQ-pT3uAuU-Kae-HjM5", "3QShHS-7RwUB10-0W2-H4Qy", "PMc4QI-5lNajXU-f8m-RGIi", "O9t3dl-q8YHozj-saR-A3Jm", "k4eH3O-aHnTKY7-ADp-4Vsi", "RA4epe-lWWnOff-bpM-bSR4", "6ysu2R-gSc5dwU-cv0-LqCJ", "tVl3TY-o42NMVO-k3S-iqOY", "NMgTrr-W1RrCvP-Zaf-paL7", "d1CJmF-CeG5asM-xms-1dwN", "N1D30g-zFjiGzI-eHC-Sof4", "tOhfKu-Gdtf9Ne-KwA-JdHV", "XLzwK0-6ocGDrS-TtU-wlEI", "XDgZfb-Sxc45Zn-mVO-S2QO", "GQD7a0-fnt9BZs-Kvh-dPbJ", "9dJxj9-HFwEQMY-6p9-s8Vt", "1qU9pA-QJGAna9-JoG-H7GS", "rKIkxA-UnGWYSn-0li-ziuB", "tbPazx-IjUrQ8J-NZe-VOPL", "xBpSIv-U6ojkK7-9p5-LviD", "88bnWI-pxrKa7T-n2d-tXk9", "0XviXp-9ksT8s0-fDy-35SW", "e0XauA-GNRALmd-SM2-Y4Gf", "kyvYBk-Bk5M4Xq-gxX-kE1B", "dIiQzS-5sT4ogL-6IV-tLmb", "OlGOyH-dyL1nzj-B2M-z8ir", "zC9Gtn-x8hpfPD-KOu-k31W", "qSq3z2-Lpv0YcB-hBq-Sabd", "LSyNyi-tBZUx1l-hAj-mwsx", "2c9aTP-hXloMK7-ufH-dgq6", "aXksHO-zARQxfo-sgS-8Bf4", "ioOXAL-eVUF0W8-vZx-ZeYX", "DXUkAP-A7SqnHj-V4U-PJfz", "cnzZXk-AOMepfN-hym-qbDH", "CMlAd6-8FF1yXs-fae-Izfv", "qiXnUv-e2PsJWm-tLF-KpjE", "Gfx3k9-JvXa7Wd-rI1-1e1E"] - --- !map_values -- -[0.9805502029231666, 0.5330291595754054, 0.3002474487337981, 0.4856360175030267, 0.7687106425158624, 0.6993506644925102, 0.2849354808825807, 0.3473417455186141, 0.1350012944304507, 0.9708132103700939, 0.1858304263994345, 0.4886337264552073, 0.3635474169515766, 0.5640845268971175, 0.1374134087807577, 0.7766547647451623, 0.5835323296668318, 0.3654459547110349, 0.5479776709993764, 0.8379932542117192, 0.1566504627835081, 0.03371222042250388, 0.1699781825927229, 0.3579630495075078, 0.02809253185597727, 0.7204247029840027, 0.2760499256423206, 0.676890893219096, 0.03529878656700025, 0.02276578351027858, 0.09794991730625469, 0.5278062884613351, 0.1370404181139102, 0.5440352476580856, 0.7205540629419929, 0.1350852984195943, 0.4160946400431862, 0.2972295454562929, 0.9217426503585693, 0.58103998733474, 0.8845427436377473, 0.1017928267299423, 0.9547186973943892, 0.1680102784708342, 0.0008487745421986714, 0.1695241541106989, 0.6783921749433292, 0.7193818386971084, 0.930443435029246, 0.4846665469390518, 0.9924998940864419, 0.7238288481079148, 0.7053563817759009, 0.9735160772776755, 0.7782499787869234, 0.7413304280548174, 0.7550983926033307, 0.8713660446322186, 0.9205209678792637, 0.3419724898972277, 0.3696806985755556, 0.03023259817152302, 0.02477452604862684, 0.9764129157525588, 0.5933057559470283, 0.7612511554831843, 0.378758227033635, 0.9312730459544121, 0.6712083507802412, 0.165080800084368, 0.2292866463959062, 0.3736665350268106, 0.2048064464080658, 0.08394355937496834, 0.8494979696731824, 0.4321556255662622, 0.3534668267198027, 0.8791700434102772, 0.2274527583015258, 0.04886968507359402, 0.7936598110174163, 0.5449717343415919, 0.7635939445968348, 0.08505586183986624, 0.3509115026589145, 0.9633191745238908, 0.3972533910389617, 0.4659759249919267, 0.1579051246328464, 0.7853565578107594, 0.9894919939745654, 0.9395365730655929, 0.202260767382666, 0.1619636856192768, 0.5105569529841616, 0.4531109229280732, 0.2579134268597084, 0.7962109089915747, 0.2772969229539421, 0.9315902037607061] - --- !map_contains_key -- -1077 [0.7805560995873845, 0.9303489002269559, 0.2529522997521877, 0.662270811026298, 0.664725297532439, 0.1019441091764477, 0.9614059300688174, 0.5278126009983843, 0.5287505841216708, 0.426116738236779, 0.4230050239387118, 0.5327026330053651, 0.6025481777942603, 0.2710733647257627, 0.613792118138183, 0.002100302783562991, 0.3200675048728582, 0.5485611014660204, 0.5121510581313707, 0.5145136652805358] {"9wXr9n-TBm9Wyt-r8H-SkAq":0.9338329010480995, "CPDH4G-ZXGPkku-3wY-ktaQ":0.4355256963350881, "RvNlMt-HHjHN5M-VjP-xHAI":0.3263474611804782, "qKIhKy-Ws344os-haX-2pmT":0.565450203625137, "DOJJ5l-UEkwVMs-x9F-HifD":0.09375622010822238, "m871g8-1eFi7jt-oBq-S0yc":0.8819687247951038, "wXugVP-v2fc6IF-DeU-On3T":0.3448233486447311, "B0mXFX-QvgUgo7-Dih-6rDu":0.1914040395475467, "E9zv3F-xMqSbMa-il4-FuDg":0.3857021891084336, "msuFIN-ZkKO8TY-tu4-veH0":0.6646172653074628, "0rSUyl-Un07aIW-KAx-WHnX":0.3558009910430974, "XvbmO8-WA6oAqc-ihc-s8IL":0.4058206434411423, "G6B6RD-AicAlZb-16u-Pn1I":0.7203554946895749, "coDK0Q-tMg1294-JMQ-ZWQu":0.8236328627743186, "4c0aWh-yhL6BOX-rRu-1n0r":0.1398091184230428, "G4iUcG-ZhWw62v-VLt-n6lH":0.1838288978254214, "IIB7qD-WQistwT-Vux-0c9B":0.9174389144309458, "7cTyuR-5ssXm2S-sJR-JTIZ":0.8132237242672837, "3KPhSW-FICEImf-bba-PCiQ":0.6302643579943553, "qQ7Yup-XBeQGFz-3EP-q0vd":0.6109025726752364, "gjRxRo-Af9Oqx5-IzN-3B9d":0.9251468490326916, "1zSj57-nNZpZ0b-ZKn-BeY0":0.5628463109107144, "sTK0mn-wkp1Xp5-PRS-txVM":0.7905808129559996, "sLrM0s-1KnXLb6-1A3-Z1vJ":0.4234598677670157, "UkYdkP-k7YKiKS-Fxp-qAcI":0.7541401266679869, "v8p0YV-R5pAKZ8-UMr-P1bQ":0.2931152565110683, "RJdTav-jk3os9Z-yRk-WhwV":0.5263811309738877, "lB91ic-pNFZkE4-hBx-e104":0.6692292834321788, "gmRV6e-GKJUg0L-ok7-J6Lz":0.05924766959664352, "o3LUyz-7Toh54O-czG-Xep8":0.6284193821127264, "8fzHhM-4otPAss-qTm-phg8":0.8953002441537012, "kZsHhe-vfClpAR-b3H-7aHl":0.1775015612747399, "TdZnlG-BUgMs7Z-iBM-9c3v":0.2749839439504633, "RipJXn-p4gZkyy-1ZY-xkWe":0.05461626895038973, "ke730M-LmMjGdc-EFy-0LUK":0.3078176183644828, "jBSExJ-GXTc5TB-NSa-xBEd":0.6617827850054024, "kI7Cc8-DSg5RdF-qLo-2bhe":0.9835707461323488, "bAn3VI-x6xXWpB-zWe-G5CJ":0.2179821229979456, "jAil30-kbt6K6z-kbr-8foB":0.9788066977245138, "IHIwNs-1QGqy8l-i8i-vu4G":0.4967939741245059, "p0IbZr-tHCtwiV-0hq-NtIt":0.05018379510905702, "iggdij-M3YNBpd-yiD-a8Ro":0.982385582884686, "BrJEww-C4LpgaS-AeB-So4U":0.9024855415553655, "xnO3Fi-8rXcpgj-zpm-EmuX":0.2052911881746857, "5w57da-phYtDUx-px2-6frG":0.2969063879156597, "31MfFs-1WyUAr6-gQ0-xLxY":0.4879555128313509, "ryBl2p-rSoPhwd-WPv-NCAU":0.7954485484495807, "KN5TEt-gOfJ4Hy-3pp-HiBa":0.1533389643648807, "ytqxb8-utXXjUf-m41-i6ir":0.6150208673719357, "WhGUGz-zzyvEpD-9BM-2bVf":0.581040090228354, "dE1tFe-zHClt4u-0cY-TQnC":0.7608999632369456, "MveBhC-g29c0dU-tCT-R6nC":0.3345734028221851, "JTpxue-xSqAhGo-AZk-zB1t":0.3504030277488054, "92TVdU-qDJesPN-0lb-JOd3":0.7387694998319805, "0PODnh-IciBdOZ-0CS-oNeL":0.9515905965769644, "KkkW6x-TiemXQw-OiH-dZ9s":0.4082412331999081, "PIs5Aj-g02HRXw-957-GD2z":0.641526116451016, "yJIzuw-au6460e-0Tl-XYEJ":0.7521928530356236, "KHvMCD-OQDL0eX-nqK-TmEt":0.1309616727896826, "6QJJgV-Z3IZ1Rf-wyv-rIJ6":0.7007110387725962, "qA9ycc-sR2qm6P-PtB-AIax":0.4462977655645909, "uDeuEb-B0t0Ljr-dWk-jkC4":0.6904672767407958, "5vPy52-ygN0MMH-UB4-nZQL":0.6057596542200021, "zbbmrQ-pT3uAuU-Kae-HjM5":0.9812657498686279, "3QShHS-7RwUB10-0W2-H4Qy":0.4155760848860853, "PMc4QI-5lNajXU-f8m-RGIi":0.7046420976800288, "O9t3dl-q8YHozj-saR-A3Jm":0.8543344954196586, "k4eH3O-aHnTKY7-ADp-4Vsi":0.2655832454718557, "RA4epe-lWWnOff-bpM-bSR4":0.7523252210222883, "6ysu2R-gSc5dwU-cv0-LqCJ":0.7830899322716732, "tVl3TY-o42NMVO-k3S-iqOY":0.7923823401215799, "NMgTrr-W1RrCvP-Zaf-paL7":0.4686928654756936, "d1CJmF-CeG5asM-xms-1dwN":0.7622908781076493, "N1D30g-zFjiGzI-eHC-Sof4":0.847542878440137, "tOhfKu-Gdtf9Ne-KwA-JdHV":0.4999285217445154, "XLzwK0-6ocGDrS-TtU-wlEI":0.3985354402705095, "XDgZfb-Sxc45Zn-mVO-S2QO":0.05791580337644187, "GQD7a0-fnt9BZs-Kvh-dPbJ":0.663903859916476, "9dJxj9-HFwEQMY-6p9-s8Vt":0.2194407595305434, "1qU9pA-QJGAna9-JoG-H7GS":0.8877401947295382, "rKIkxA-UnGWYSn-0li-ziuB":0.1607906275036466, "tbPazx-IjUrQ8J-NZe-VOPL":0.6809166916797593, "xBpSIv-U6ojkK7-9p5-LviD":0.1195672647379901, "88bnWI-pxrKa7T-n2d-tXk9":0.1956068951787721, "0XviXp-9ksT8s0-fDy-35SW":0.8690659418822626, "e0XauA-GNRALmd-SM2-Y4Gf":0.6840816888752089, "kyvYBk-Bk5M4Xq-gxX-kE1B":0.7744771682336401, "dIiQzS-5sT4ogL-6IV-tLmb":0.0340772833497166, "OlGOyH-dyL1nzj-B2M-z8ir":0.3765608037933722, "zC9Gtn-x8hpfPD-KOu-k31W":0.864392047887076, "qSq3z2-Lpv0YcB-hBq-Sabd":0.1542847609246678, "LSyNyi-tBZUx1l-hAj-mwsx":0.304034328298701, "2c9aTP-hXloMK7-ufH-dgq6":0.1016852552953107, "aXksHO-zARQxfo-sgS-8Bf4":0.5490533082019959, "ioOXAL-eVUF0W8-vZx-ZeYX":0.4528164038481785, "DXUkAP-A7SqnHj-V4U-PJfz":0.3607407447425939, "cnzZXk-AOMepfN-hym-qbDH":0.4587361500592568, "CMlAd6-8FF1yXs-fae-Izfv":0.07555019720825917, "qiXnUv-e2PsJWm-tLF-KpjE":0.9409681065363688, "Gfx3k9-JvXa7Wd-rI1-1e1E":0.7492793312178226} {"name":"r8HXXQM4XHoI", "age":238221053, "tip":"2023-07-26 15:40:37.694000"} - --- !array_max -- -11028 - --- !null_element_at_orc -- -0 - --- !map_key_select_orc -- -38111 0.770169659057425 - --- !map_keys_orc -- -["9wXr9n-TBm9Wyt-r8H-SkAq", "CPDH4G-ZXGPkku-3wY-ktaQ", "RvNlMt-HHjHN5M-VjP-xHAI", "qKIhKy-Ws344os-haX-2pmT", "DOJJ5l-UEkwVMs-x9F-HifD", "m871g8-1eFi7jt-oBq-S0yc", "wXugVP-v2fc6IF-DeU-On3T", "B0mXFX-QvgUgo7-Dih-6rDu", "E9zv3F-xMqSbMa-il4-FuDg", "msuFIN-ZkKO8TY-tu4-veH0", "0rSUyl-Un07aIW-KAx-WHnX", "XvbmO8-WA6oAqc-ihc-s8IL", "G6B6RD-AicAlZb-16u-Pn1I", "coDK0Q-tMg1294-JMQ-ZWQu", "4c0aWh-yhL6BOX-rRu-1n0r", "G4iUcG-ZhWw62v-VLt-n6lH", "IIB7qD-WQistwT-Vux-0c9B", "7cTyuR-5ssXm2S-sJR-JTIZ", "3KPhSW-FICEImf-bba-PCiQ", "qQ7Yup-XBeQGFz-3EP-q0vd", "gjRxRo-Af9Oqx5-IzN-3B9d", "1zSj57-nNZpZ0b-ZKn-BeY0", "sTK0mn-wkp1Xp5-PRS-txVM", "sLrM0s-1KnXLb6-1A3-Z1vJ", "UkYdkP-k7YKiKS-Fxp-qAcI", "v8p0YV-R5pAKZ8-UMr-P1bQ", "RJdTav-jk3os9Z-yRk-WhwV", "lB91ic-pNFZkE4-hBx-e104", "gmRV6e-GKJUg0L-ok7-J6Lz", "o3LUyz-7Toh54O-czG-Xep8", "8fzHhM-4otPAss-qTm-phg8", "kZsHhe-vfClpAR-b3H-7aHl", "TdZnlG-BUgMs7Z-iBM-9c3v", "RipJXn-p4gZkyy-1ZY-xkWe", "ke730M-LmMjGdc-EFy-0LUK", "jBSExJ-GXTc5TB-NSa-xBEd", "kI7Cc8-DSg5RdF-qLo-2bhe", "bAn3VI-x6xXWpB-zWe-G5CJ", "jAil30-kbt6K6z-kbr-8foB", "IHIwNs-1QGqy8l-i8i-vu4G", "p0IbZr-tHCtwiV-0hq-NtIt", "iggdij-M3YNBpd-yiD-a8Ro", "BrJEww-C4LpgaS-AeB-So4U", "xnO3Fi-8rXcpgj-zpm-EmuX", "5w57da-phYtDUx-px2-6frG", "31MfFs-1WyUAr6-gQ0-xLxY", "ryBl2p-rSoPhwd-WPv-NCAU", "KN5TEt-gOfJ4Hy-3pp-HiBa", "ytqxb8-utXXjUf-m41-i6ir", "WhGUGz-zzyvEpD-9BM-2bVf", "dE1tFe-zHClt4u-0cY-TQnC", "MveBhC-g29c0dU-tCT-R6nC", "JTpxue-xSqAhGo-AZk-zB1t", "92TVdU-qDJesPN-0lb-JOd3", "0PODnh-IciBdOZ-0CS-oNeL", "KkkW6x-TiemXQw-OiH-dZ9s", "PIs5Aj-g02HRXw-957-GD2z", "yJIzuw-au6460e-0Tl-XYEJ", "KHvMCD-OQDL0eX-nqK-TmEt", "6QJJgV-Z3IZ1Rf-wyv-rIJ6", "qA9ycc-sR2qm6P-PtB-AIax", "uDeuEb-B0t0Ljr-dWk-jkC4", "5vPy52-ygN0MMH-UB4-nZQL", "zbbmrQ-pT3uAuU-Kae-HjM5", "3QShHS-7RwUB10-0W2-H4Qy", "PMc4QI-5lNajXU-f8m-RGIi", "O9t3dl-q8YHozj-saR-A3Jm", "k4eH3O-aHnTKY7-ADp-4Vsi", "RA4epe-lWWnOff-bpM-bSR4", "6ysu2R-gSc5dwU-cv0-LqCJ", "tVl3TY-o42NMVO-k3S-iqOY", "NMgTrr-W1RrCvP-Zaf-paL7", "d1CJmF-CeG5asM-xms-1dwN", "N1D30g-zFjiGzI-eHC-Sof4", "tOhfKu-Gdtf9Ne-KwA-JdHV", "XLzwK0-6ocGDrS-TtU-wlEI", "XDgZfb-Sxc45Zn-mVO-S2QO", "GQD7a0-fnt9BZs-Kvh-dPbJ", "9dJxj9-HFwEQMY-6p9-s8Vt", "1qU9pA-QJGAna9-JoG-H7GS", "rKIkxA-UnGWYSn-0li-ziuB", "tbPazx-IjUrQ8J-NZe-VOPL", "xBpSIv-U6ojkK7-9p5-LviD", "88bnWI-pxrKa7T-n2d-tXk9", "0XviXp-9ksT8s0-fDy-35SW", "e0XauA-GNRALmd-SM2-Y4Gf", "kyvYBk-Bk5M4Xq-gxX-kE1B", "dIiQzS-5sT4ogL-6IV-tLmb", "OlGOyH-dyL1nzj-B2M-z8ir", "zC9Gtn-x8hpfPD-KOu-k31W", "qSq3z2-Lpv0YcB-hBq-Sabd", "LSyNyi-tBZUx1l-hAj-mwsx", "2c9aTP-hXloMK7-ufH-dgq6", "aXksHO-zARQxfo-sgS-8Bf4", "ioOXAL-eVUF0W8-vZx-ZeYX", "DXUkAP-A7SqnHj-V4U-PJfz", "cnzZXk-AOMepfN-hym-qbDH", "CMlAd6-8FF1yXs-fae-Izfv", "qiXnUv-e2PsJWm-tLF-KpjE", "Gfx3k9-JvXa7Wd-rI1-1e1E"] - --- !map_values_orc -- -[0.9805502029231666, 0.5330291595754054, 0.3002474487337981, 0.4856360175030267, 0.7687106425158624, 0.6993506644925102, 0.2849354808825807, 0.3473417455186141, 0.1350012944304507, 0.9708132103700939, 0.1858304263994345, 0.4886337264552073, 0.3635474169515766, 0.5640845268971175, 0.1374134087807577, 0.7766547647451623, 0.5835323296668318, 0.3654459547110349, 0.5479776709993764, 0.8379932542117192, 0.1566504627835081, 0.03371222042250388, 0.1699781825927229, 0.3579630495075078, 0.02809253185597727, 0.7204247029840027, 0.2760499256423206, 0.676890893219096, 0.03529878656700025, 0.02276578351027858, 0.09794991730625469, 0.5278062884613351, 0.1370404181139102, 0.5440352476580856, 0.7205540629419929, 0.1350852984195943, 0.4160946400431862, 0.2972295454562929, 0.9217426503585693, 0.58103998733474, 0.8845427436377473, 0.1017928267299423, 0.9547186973943892, 0.1680102784708342, 0.0008487745421986714, 0.1695241541106989, 0.6783921749433292, 0.7193818386971084, 0.930443435029246, 0.4846665469390518, 0.9924998940864419, 0.7238288481079148, 0.7053563817759009, 0.9735160772776755, 0.7782499787869234, 0.7413304280548174, 0.7550983926033307, 0.8713660446322186, 0.9205209678792637, 0.3419724898972277, 0.3696806985755556, 0.03023259817152302, 0.02477452604862684, 0.9764129157525588, 0.5933057559470283, 0.7612511554831843, 0.378758227033635, 0.9312730459544121, 0.6712083507802412, 0.165080800084368, 0.2292866463959062, 0.3736665350268106, 0.2048064464080658, 0.08394355937496834, 0.8494979696731824, 0.4321556255662622, 0.3534668267198027, 0.8791700434102772, 0.2274527583015258, 0.04886968507359402, 0.7936598110174163, 0.5449717343415919, 0.7635939445968348, 0.08505586183986624, 0.3509115026589145, 0.9633191745238908, 0.3972533910389617, 0.4659759249919267, 0.1579051246328464, 0.7853565578107594, 0.9894919939745654, 0.9395365730655929, 0.202260767382666, 0.1619636856192768, 0.5105569529841616, 0.4531109229280732, 0.2579134268597084, 0.7962109089915747, 0.2772969229539421, 0.9315902037607061] - --- !map_contains_key_orc -- -1077 [0.7805560995873845, 0.9303489002269559, 0.2529522997521877, 0.662270811026298, 0.664725297532439, 0.1019441091764477, 0.9614059300688174, 0.5278126009983843, 0.5287505841216708, 0.426116738236779, 0.4230050239387118, 0.5327026330053651, 0.6025481777942603, 0.2710733647257627, 0.613792118138183, 0.002100302783562991, 0.3200675048728582, 0.5485611014660204, 0.5121510581313707, 0.5145136652805358] {"9wXr9n-TBm9Wyt-r8H-SkAq":0.9338329010480995, "CPDH4G-ZXGPkku-3wY-ktaQ":0.4355256963350881, "RvNlMt-HHjHN5M-VjP-xHAI":0.3263474611804782, "qKIhKy-Ws344os-haX-2pmT":0.565450203625137, "DOJJ5l-UEkwVMs-x9F-HifD":0.09375622010822238, "m871g8-1eFi7jt-oBq-S0yc":0.8819687247951038, "wXugVP-v2fc6IF-DeU-On3T":0.3448233486447311, "B0mXFX-QvgUgo7-Dih-6rDu":0.1914040395475467, "E9zv3F-xMqSbMa-il4-FuDg":0.3857021891084336, "msuFIN-ZkKO8TY-tu4-veH0":0.6646172653074628, "0rSUyl-Un07aIW-KAx-WHnX":0.3558009910430974, "XvbmO8-WA6oAqc-ihc-s8IL":0.4058206434411423, "G6B6RD-AicAlZb-16u-Pn1I":0.7203554946895749, "coDK0Q-tMg1294-JMQ-ZWQu":0.8236328627743186, "4c0aWh-yhL6BOX-rRu-1n0r":0.1398091184230428, "G4iUcG-ZhWw62v-VLt-n6lH":0.1838288978254214, "IIB7qD-WQistwT-Vux-0c9B":0.9174389144309458, "7cTyuR-5ssXm2S-sJR-JTIZ":0.8132237242672837, "3KPhSW-FICEImf-bba-PCiQ":0.6302643579943553, "qQ7Yup-XBeQGFz-3EP-q0vd":0.6109025726752364, "gjRxRo-Af9Oqx5-IzN-3B9d":0.9251468490326916, "1zSj57-nNZpZ0b-ZKn-BeY0":0.5628463109107144, "sTK0mn-wkp1Xp5-PRS-txVM":0.7905808129559996, "sLrM0s-1KnXLb6-1A3-Z1vJ":0.4234598677670157, "UkYdkP-k7YKiKS-Fxp-qAcI":0.7541401266679869, "v8p0YV-R5pAKZ8-UMr-P1bQ":0.2931152565110683, "RJdTav-jk3os9Z-yRk-WhwV":0.5263811309738877, "lB91ic-pNFZkE4-hBx-e104":0.6692292834321788, "gmRV6e-GKJUg0L-ok7-J6Lz":0.05924766959664352, "o3LUyz-7Toh54O-czG-Xep8":0.6284193821127264, "8fzHhM-4otPAss-qTm-phg8":0.8953002441537012, "kZsHhe-vfClpAR-b3H-7aHl":0.1775015612747399, "TdZnlG-BUgMs7Z-iBM-9c3v":0.2749839439504633, "RipJXn-p4gZkyy-1ZY-xkWe":0.05461626895038973, "ke730M-LmMjGdc-EFy-0LUK":0.3078176183644828, "jBSExJ-GXTc5TB-NSa-xBEd":0.6617827850054024, "kI7Cc8-DSg5RdF-qLo-2bhe":0.9835707461323488, "bAn3VI-x6xXWpB-zWe-G5CJ":0.2179821229979456, "jAil30-kbt6K6z-kbr-8foB":0.9788066977245138, "IHIwNs-1QGqy8l-i8i-vu4G":0.4967939741245059, "p0IbZr-tHCtwiV-0hq-NtIt":0.05018379510905702, "iggdij-M3YNBpd-yiD-a8Ro":0.982385582884686, "BrJEww-C4LpgaS-AeB-So4U":0.9024855415553655, "xnO3Fi-8rXcpgj-zpm-EmuX":0.2052911881746857, "5w57da-phYtDUx-px2-6frG":0.2969063879156597, "31MfFs-1WyUAr6-gQ0-xLxY":0.4879555128313509, "ryBl2p-rSoPhwd-WPv-NCAU":0.7954485484495807, "KN5TEt-gOfJ4Hy-3pp-HiBa":0.1533389643648807, "ytqxb8-utXXjUf-m41-i6ir":0.6150208673719357, "WhGUGz-zzyvEpD-9BM-2bVf":0.581040090228354, "dE1tFe-zHClt4u-0cY-TQnC":0.7608999632369456, "MveBhC-g29c0dU-tCT-R6nC":0.3345734028221851, "JTpxue-xSqAhGo-AZk-zB1t":0.3504030277488054, "92TVdU-qDJesPN-0lb-JOd3":0.7387694998319805, "0PODnh-IciBdOZ-0CS-oNeL":0.9515905965769644, "KkkW6x-TiemXQw-OiH-dZ9s":0.4082412331999081, "PIs5Aj-g02HRXw-957-GD2z":0.641526116451016, "yJIzuw-au6460e-0Tl-XYEJ":0.7521928530356236, "KHvMCD-OQDL0eX-nqK-TmEt":0.1309616727896826, "6QJJgV-Z3IZ1Rf-wyv-rIJ6":0.7007110387725962, "qA9ycc-sR2qm6P-PtB-AIax":0.4462977655645909, "uDeuEb-B0t0Ljr-dWk-jkC4":0.6904672767407958, "5vPy52-ygN0MMH-UB4-nZQL":0.6057596542200021, "zbbmrQ-pT3uAuU-Kae-HjM5":0.9812657498686279, "3QShHS-7RwUB10-0W2-H4Qy":0.4155760848860853, "PMc4QI-5lNajXU-f8m-RGIi":0.7046420976800288, "O9t3dl-q8YHozj-saR-A3Jm":0.8543344954196586, "k4eH3O-aHnTKY7-ADp-4Vsi":0.2655832454718557, "RA4epe-lWWnOff-bpM-bSR4":0.7523252210222883, "6ysu2R-gSc5dwU-cv0-LqCJ":0.7830899322716732, "tVl3TY-o42NMVO-k3S-iqOY":0.7923823401215799, "NMgTrr-W1RrCvP-Zaf-paL7":0.4686928654756936, "d1CJmF-CeG5asM-xms-1dwN":0.7622908781076493, "N1D30g-zFjiGzI-eHC-Sof4":0.847542878440137, "tOhfKu-Gdtf9Ne-KwA-JdHV":0.4999285217445154, "XLzwK0-6ocGDrS-TtU-wlEI":0.3985354402705095, "XDgZfb-Sxc45Zn-mVO-S2QO":0.05791580337644187, "GQD7a0-fnt9BZs-Kvh-dPbJ":0.663903859916476, "9dJxj9-HFwEQMY-6p9-s8Vt":0.2194407595305434, "1qU9pA-QJGAna9-JoG-H7GS":0.8877401947295382, "rKIkxA-UnGWYSn-0li-ziuB":0.1607906275036466, "tbPazx-IjUrQ8J-NZe-VOPL":0.6809166916797593, "xBpSIv-U6ojkK7-9p5-LviD":0.1195672647379901, "88bnWI-pxrKa7T-n2d-tXk9":0.1956068951787721, "0XviXp-9ksT8s0-fDy-35SW":0.8690659418822626, "e0XauA-GNRALmd-SM2-Y4Gf":0.6840816888752089, "kyvYBk-Bk5M4Xq-gxX-kE1B":0.7744771682336401, "dIiQzS-5sT4ogL-6IV-tLmb":0.0340772833497166, "OlGOyH-dyL1nzj-B2M-z8ir":0.3765608037933722, "zC9Gtn-x8hpfPD-KOu-k31W":0.864392047887076, "qSq3z2-Lpv0YcB-hBq-Sabd":0.1542847609246678, "LSyNyi-tBZUx1l-hAj-mwsx":0.304034328298701, "2c9aTP-hXloMK7-ufH-dgq6":0.1016852552953107, "aXksHO-zARQxfo-sgS-8Bf4":0.5490533082019959, "ioOXAL-eVUF0W8-vZx-ZeYX":0.4528164038481785, "DXUkAP-A7SqnHj-V4U-PJfz":0.3607407447425939, "cnzZXk-AOMepfN-hym-qbDH":0.4587361500592568, "CMlAd6-8FF1yXs-fae-Izfv":0.07555019720825917, "qiXnUv-e2PsJWm-tLF-KpjE":0.9409681065363688, "Gfx3k9-JvXa7Wd-rI1-1e1E":0.7492793312178226} {"name":"r8HXXQM4XHoI", "age":238221053, "tip":"2023-07-26 15:40:37.694000"} - --- !array_max_orc -- -11028 - --- !offsets_check -- -0 [1, 2] [[], [3], null] {"a":1, "b":2} {"s1":"e", "s2":null} -1 [] [] {} \N -2 \N \N \N {"s1":"h", "s2":10} -3 [5, null] [[6, 7], [8, null], null] {"f":1, "g":null} {"s1":null, "s2":9} - --- !map_with_nullable_key -- -\N \N \N \N \N \N \N \N \N test test aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa test 3 4 5.1 6.2 true false -1.2 12.30 -1234.5678 123456789.12340000 -1234567890.12345678 1234567890123456789012.1234567800000000 test2 {"test":"test"} {"test":"test"} {"test":"test"} {3:3} {4:4} {5:5} {6:6} {1:1} {-1.2:-1.2} {12.30:12.30} {-1234.5678:-1234.5678} {123456789.12340000:123456789.12340000} {-1234567890.12345678:-1234567890.12345678} {1234567890123456789012.1234567800000000:1234567890123456789012.1234567800000000} ["test"] [3] [4] [5] [6] [1] ["test"] ["test"] [-1.2] [12.30] [-1234.5678] [123456789.12340000] [-1234567890.12345678] [1234567890123456789012.1234567800000000] {"s_bigint":1} {"test":[{"s_int":1}]} {"struct_field":["1", "2", "3"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":null, "struct_non_nulls_after_nulls2":"some string"} {"struct_field1":null, "struct_field2":"some string", "strict_field3":{"nested_struct_field1":null, "nested_struct_field2":"nested_string2"}} {"k1":"v1", "k2":null, "k3":"v3"} [null, "test"] ["test-1", null, "test-2"] ["test", null] [null, null, null] - --- !date_dict -- -2036-12-28 1898-12-28 2539-12-28 - diff --git a/regression-test/data/external_table_p0/hive/test_external_catalog_hive.out b/regression-test/data/external_table_p0/hive/test_external_catalog_hive.out index 8a104343fc4e10..57367a3bf0999c 100644 --- a/regression-test/data/external_table_p0/hive/test_external_catalog_hive.out +++ b/regression-test/data/external_table_p0/hive/test_external_catalog_hive.out @@ -123,127 +123,3 @@ a126 15 2017-09-13 2009-09-21T04:23:14.309124 2024-03-23 2024-02-01T21:11:09.170 --- !q01 -- -zhangsan 1 -lisi 1 - --- !q02 -- -1 1 -2 1 -3 1 -4 1 - --- !q03 -- -123 china 4 56 sc -234 america 5 67 ls -345 cana 4 56 fy -567 fre 7 89 pa - --- !q04 -- -p_partkey2 p_name2 p_mfgr2 p_brand2 p_type2 p_size2 p_con2 p_r_price2 p_comment2 -p_partkey1 p_name1 p_mfgr1 p_brand1 p_type1 p_size1 p_con1 p_r_price1 p_comment1 -p_partkey0 p_name0 p_mfgr0 p_brand0 p_type0 p_size0 p_con0 p_r_price0 p_comment0 - --- !q05 -- -batchno appsheet_no filedate t_no tano t_name chged_no mob_no2 home_no off_no -off_no home_no mob_no2 chged_no t_name tano t_no filedate appsheet_no batchno - --- !q06 -- -bill_code dates ord_year ord_month ord_quarter on_time - --- !q07 -- -2 - --- !q08 -- -123 zhangsan 12 123.45 2022-01-01 -124 lisi 12 123.45 2022-01-01 -125 lisan 12 123.45 2022-01-02 - --- !q09 -- -a123 12 -a124 13 -a125 14 -a126 15 - --- !par_fields_in_file_orc1 -- -1 Alice 100.0 2023 8 -2 Bob 150.0 2023 8 - --- !par_fields_in_file_parquet1 -- -1 Alice 100.0 2023 8 -2 Bob 150.0 2023 8 - --- !par_fields_in_file_orc2 -- -1 Alice 100.0 2023 8 -2 Bob 150.0 2023 8 - --- !par_fields_in_file_parquet2 -- -1 Alice 100.0 2023 8 -2 Bob 150.0 2023 8 - --- !par_fields_in_file_orc3 -- -1 Alice 100.0 2023 8 -2 Bob 150.0 2023 8 - --- !par_fields_in_file_parquet3 -- -1 Alice 100.0 2023 8 -2 Bob 150.0 2023 8 - --- !par_fields_in_file_orc4 -- -1 Alice 100.0 2023 8 -2 Bob 150.0 2023 8 - --- !par_fields_in_file_parquet4 -- -1 Alice 100.0 2023 8 -2 Bob 150.0 2023 8 - --- !par_fields_in_file_orc5 -- - --- !par_fields_in_file_parquet5 -- - --- !par_fields_in_file_orc1 -- -1 Alice 100.0 2023 8 -2 Bob 150.0 2023 8 - --- !par_fields_in_file_parquet1 -- -1 Alice 100.0 2023 8 -2 Bob 150.0 2023 8 - --- !par_fields_in_file_orc2 -- -1 Alice 100.0 2023 8 -2 Bob 150.0 2023 8 - --- !par_fields_in_file_parquet2 -- -1 Alice 100.0 2023 8 -2 Bob 150.0 2023 8 - --- !par_fields_in_file_orc3 -- -1 Alice 100.0 2023 8 -2 Bob 150.0 2023 8 - --- !par_fields_in_file_parquet3 -- -1 Alice 100.0 2023 8 -2 Bob 150.0 2023 8 - --- !par_fields_in_file_orc4 -- -1 Alice 100.0 2023 8 -2 Bob 150.0 2023 8 - --- !par_fields_in_file_parquet4 -- -1 Alice 100.0 2023 8 -2 Bob 150.0 2023 8 - --- !par_fields_in_file_orc5 -- - --- !par_fields_in_file_parquet5 -- - --- !parquet_adjusted_utc -- -1997-09-21 1999-01-12T15:12:31.235784 -1998-01-12 1993-06-11T11:33:12.356500 -2002-09-29 2001-01-17T21:23:42.120 -2008-08-07 2023-09-23T11:12:17.458 -2009-11-13 2011-11-12T01:23:06.986 -2012-07-08 2023-11-09T20:21:16.321 -2017-09-13 2009-09-21T04:23:14.309124 -2024-03-23 2024-02-01T21:11:09.170 - diff --git a/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out b/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out index 0402feef40e6b5..deda902d300d43 100644 --- a/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out +++ b/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out @@ -119,123 +119,3 @@ -- !q06 -- 2023-01-03T00:00 100 0.3 test3 --- !q01 -- -0.1 test1 2023-01-01T00:00 \N -0.2 test2 2023-01-02T00:00 \N -0.3 test3 2023-01-03T00:00 100 - --- !q02 -- -0.1 test1 2023-01-01T00:00 \N -0.2 test2 2023-01-02T00:00 \N - --- !q03 -- -0.3 test3 2023-01-03T00:00 100 - --- !q04 -- -2023-01-01T00:00 \N 0.1 test1 -2023-01-02T00:00 \N 0.2 test2 -2023-01-03T00:00 100 0.3 test3 - --- !q05 -- -2023-01-01T00:00 \N 0.1 test1 -2023-01-02T00:00 \N 0.2 test2 - --- !q06 -- -2023-01-03T00:00 100 0.3 test3 - --- !q01 -- -0.1 test1 2023-01-01T00:00 \N -0.2 test2 2023-01-02T00:00 \N -0.3 test3 2023-01-03T00:00 100 - --- !q02 -- -0.1 test1 2023-01-01T00:00 \N -0.2 test2 2023-01-02T00:00 \N - --- !q03 -- -0.3 test3 2023-01-03T00:00 100 - --- !q04 -- -2023-01-01T00:00 \N 0.1 test1 -2023-01-02T00:00 \N 0.2 test2 -2023-01-03T00:00 100 0.3 test3 - --- !q05 -- -2023-01-01T00:00 \N 0.1 test1 -2023-01-02T00:00 \N 0.2 test2 - --- !q06 -- -2023-01-03T00:00 100 0.3 test3 - --- !q01 -- -0.1 test1 2023-01-01T00:00 \N -0.2 test2 2023-01-02T00:00 \N -0.3 test3 2023-01-03T00:00 100 - --- !q02 -- -0.1 test1 2023-01-01T00:00 \N -0.2 test2 2023-01-02T00:00 \N - --- !q03 -- -0.3 test3 2023-01-03T00:00 100 - --- !q04 -- -2023-01-01T00:00 \N 0.1 test1 -2023-01-02T00:00 \N 0.2 test2 -2023-01-03T00:00 100 0.3 test3 - --- !q05 -- -2023-01-01T00:00 \N 0.1 test1 -2023-01-02T00:00 \N 0.2 test2 - --- !q06 -- -2023-01-03T00:00 100 0.3 test3 - --- !q01 -- -0.1 test1 2023-01-01T00:00 \N -0.2 test2 2023-01-02T00:00 \N -0.3 test3 2023-01-03T00:00 100 - --- !q02 -- -0.1 test1 2023-01-01T00:00 \N -0.2 test2 2023-01-02T00:00 \N - --- !q03 -- -0.3 test3 2023-01-03T00:00 100 - --- !q04 -- -2023-01-01T00:00 \N 0.1 test1 -2023-01-02T00:00 \N 0.2 test2 -2023-01-03T00:00 100 0.3 test3 - --- !q05 -- -2023-01-01T00:00 \N 0.1 test1 -2023-01-02T00:00 \N 0.2 test2 - --- !q06 -- -2023-01-03T00:00 100 0.3 test3 - --- !q01 -- -0.1 test1 2023-01-01T00:00 \N -0.2 test2 2023-01-02T00:00 \N -0.3 test3 2023-01-03T00:00 100 - --- !q02 -- -0.1 test1 2023-01-01T00:00 \N -0.2 test2 2023-01-02T00:00 \N - --- !q03 -- -0.3 test3 2023-01-03T00:00 100 - --- !q04 -- -2023-01-01T00:00 \N 0.1 test1 -2023-01-02T00:00 \N 0.2 test2 -2023-01-03T00:00 100 0.3 test3 - --- !q05 -- -2023-01-01T00:00 \N 0.1 test1 -2023-01-02T00:00 \N 0.2 test2 - --- !q06 -- -2023-01-03T00:00 100 0.3 test3 - diff --git a/regression-test/data/external_table_p0/hive/test_hive_compress_type.out b/regression-test/data/external_table_p0/hive/test_hive_compress_type.out index ca9ca885c5b854..ee4c9a8f2731ba 100644 --- a/regression-test/data/external_table_p0/hive/test_hive_compress_type.out +++ b/regression-test/data/external_table_p0/hive/test_hive_compress_type.out @@ -1,486 +1,440 @@ -- This file is automatically generated. You should know what you did if you want to edit this --- !q21 -- -600005 - --- !q22 -- -1510010 - --- !q23 -- -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 2023-08-21 -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 bzip2 -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 bzip2 -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 deflate -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 deflate -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 gzip -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 gzip -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 lz4 -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 mix -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 mix -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 mix -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 mix -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 plain -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 plain -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 snappy - --- !q31 -- -600005 - --- !q32 -- -1510010 - --- !q33 -- -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 2023-08-21 -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 bzip2 -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 bzip2 -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 deflate -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 deflate -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 gzip -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 gzip -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 lz4 -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 mix -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 mix -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 mix -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 mix -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 plain -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 plain -4611870011201662970 0 HD Tube 5* 1 2014-03-22T05:11:29 2014-03-22 598875 4243808759 92f6fe1be9b9773206d6b63e50feb470 196 2314158381335918424 0 3 3 http://public_search yandex.ru.livemaster 0 0 [] [4,15,333,3912,14512,12818] [18,348,1010] [] 1846 952 29 10 1 0.77 0 0 24 73d7 1 1 0 0 3238011 0 0 0 0 1119 641 157 2014-03-22T19:51:48 0 0 0 0 utf-8 330 0 0 0 7774109565808082252 11274076 0 0 0 0 0 E 2014-03-22T11:54:54 55 2 3 4 6 [105,11,9,88,45,14,98,72,3,925,2193,6,25,1] 3137666015 cc184643699dccab8d5d4af796c47449 -1 -1 -1 nD Tp 0 -1 0 0 81 0 0 0 -1 -1 -1 -1 -1 -1 -1 -1 0 0 07d21f 0 [] 0 15284527577228392792 14270691585016129648 0 0 [] [] [] [] [] \N c1889e2b9ad1e219ed04c0e9624b5139 1404 0 snappy - -- !q42 -- 215 -- !q43 -- 1 100 5 1000000000 10.5 20.75 true First A Alpha 2023-10-06 2023-10-06T14:30 123.45 -1 578 55 2111222273 56.858597 82.38111658179561 true Random C LYDUG 2023-12-17 2023-12-05T13:04:58 1393.11 -1 979 44 10163954251 28.827957 57.56879940298416 true Random Q DNRGE 2023-12-09 2023-12-10T20:21:58 1581.25 +1 578 55 2111222273 56.8586 82.38111658179561 true Random C LYDUG 2023-12-17 2023-12-05T13:04:58 1393.11 +1 979 44 10163954251 28.82796 57.56879940298416 true Random Q DNRGE 2023-12-09 2023-12-10T20:21:58 1581.25 10 1000 50 10000000000 55.25 65.75 false Tenth J Kappa 2023-10-15 2023-10-15T23:30 1012.34 -10 210 26 8549838179 23.438345 73.36477128189287 true Random N VVXIF 2023-11-24 2023-12-13T18:04:58 226.65 -10 386 51 1214815770 13.959902 36.64197990482059 false Random J ORLGI 2023-12-18 2023-11-27T17:13:58 852.62 -10 966 38 2203748112 45.555325 27.908447208440094 true Random W LFAGO 2023-12-14 2023-11-26T20:00:58 1898.68 +10 210 26 8549838179 23.43834 73.36477128189287 true Random N VVXIF 2023-11-24 2023-12-13T18:04:58 226.65 +10 386 51 1214815770 13.9599 36.64197990482059 false Random J ORLGI 2023-12-18 2023-11-27T17:13:58 852.62 +10 966 38 2203748112 45.55532 27.90844720844009 true Random W LFAGO 2023-12-14 2023-11-26T20:00:58 1898.68 100 281 26 3174393241 51.05278 52.09566669589555 false Random F SLDWB 2023-12-14 2023-12-12T07:03:58 798.30 100 289 71 4919981667 66.56684 69.73132704711037 true Random V QOLAP 2023-12-17 2023-12-23T09:38:58 217.05 -11 1100 55 11000000000 60.5 70.0 true Eleventh K Lambda 2023-10-16 2023-10-16T01:45 1123.45 -11 426 67 8473986652 17.942455 71.80682514420877 true Random X FXDUV 2023-12-04 2023-12-22T07:51:58 129.81 -11 441 19 7370044350 74.261696 62.013817404758086 true Random D UYKZA 2023-12-23 2023-12-15T11:49:58 1805.14 -11 487 27 14556302216 85.33334 62.596750833474495 true Random E QMHJD 2023-12-23 2023-12-24T08:30:58 1491.22 -11 770 17 7962512669 12.508753 83.33847413902296 true Random P LHJRA 2023-12-06 2023-12-04T15:48:58 970.51 +11 1100 55 11000000000 60.5 70 true Eleventh K Lambda 2023-10-16 2023-10-16T01:45 1123.45 +11 426 67 8473986652 17.94246 71.80682514420877 true Random X FXDUV 2023-12-04 2023-12-22T07:51:58 129.81 +11 441 19 7370044350 74.2617 62.01381740475809 true Random D UYKZA 2023-12-23 2023-12-15T11:49:58 1805.14 +11 487 27 14556302216 85.33334 62.59675083347449 true Random E QMHJD 2023-12-23 2023-12-24T08:30:58 1491.22 +11 770 17 7962512669 12.50875 83.33847413902296 true Random P LHJRA 2023-12-06 2023-12-04T15:48:58 970.51 12 1200 60 12000000000 65.75 75.25 false Twelfth L Mu 2023-10-17 2023-10-17T02:15 1234.56 -12 751 8 12205294947 23.468674 64.35048302450815 true Random K FCSBV 2023-12-03 2023-12-17T01:10:58 325.26 +12 751 8 12205294947 23.46867 64.35048302450815 true Random K FCSBV 2023-12-03 2023-12-17T01:10:58 325.26 12 782 48 5080583047 75.55138 49.6324463213595 true Random N WYJDW 2023-12-16 2023-12-18T02:58:58 944.42 -12 987 73 1432735571 40.308147 43.5019559828596 true Random S MZUNG 2023-12-07 2023-12-03T13:42:58 215.12 +12 987 73 1432735571 40.30815 43.5019559828596 true Random S MZUNG 2023-12-07 2023-12-03T13:42:58 215.12 13 1300 65 13000000000 70.0 80.5 true Thirteenth M Nu 2023-10-18 2023-10-18T03:30 1345.67 -13 335 39 13869202091 30.426075 39.02304533093442 true Random L AULCC 2023-12-08 2023-12-13T00:26:58 387.97 +13 335 39 13869202091 30.42607 39.02304533093442 true Random L AULCC 2023-12-08 2023-12-13T00:26:58 387.97 13 402 30 10851194313 74.82481 74.90108005771035 false Random F GEMMK 2023-11-27 2023-12-21T15:03:58 1643.55 -13 503 34 6763884255 23.660393 63.9797872103468 true Random S POEBK 2023-12-22 2023-12-23T23:16:58 486.62 -13 696 74 3370487489 84.544014 88.69976219408227 true Random H RTFJI 2023-11-23 2023-11-25T07:32:58 1761.50 -13 745 48 13047949175 51.168613 85.21972389262197 true Random A AYBWQ 2023-12-22 2023-12-22T16:25:58 1192.48 -13 859 65 7433576046 56.136265 34.87823331022725 false Random L CRFUF 2023-12-23 2023-12-12T15:05:58 1037.15 +13 503 34 6763884255 23.66039 63.9797872103468 true Random S POEBK 2023-12-22 2023-12-23T23:16:58 486.62 +13 696 74 3370487489 84.54401 88.69976219408227 true Random H RTFJI 2023-11-23 2023-11-25T07:32:58 1761.50 +13 745 48 13047949175 51.16861 85.21972389262197 true Random A AYBWQ 2023-12-22 2023-12-22T16:25:58 1192.48 +13 859 65 7433576046 56.13626 34.87823331022725 false Random L CRFUF 2023-12-23 2023-12-12T15:05:58 1037.15 14 1400 70 14000000000 75.25 85.75 false Fourteenth N Xi 2023-10-19 2023-10-19T04:45 1456.78 -14 195 17 2370700139 16.777058 64.81793301410002 false Random P IIGRE 2023-12-12 2023-12-14T22:40:58 1678.44 -14 966 65 7828602539 62.430664 68.85873133439297 true Random I VVOQH 2023-12-01 2023-12-06T00:54:58 1300.43 -14 968 16 11314514196 62.509666 33.1841427251225 false Random T WDEVJ 2023-11-24 2023-12-06T17:54:58 431.61 -15 1500 75 15000000000 80.5 90.0 true Fifteenth O Omicron 2023-10-20 2023-10-20T05:15 1567.89 +14 195 17 2370700139 16.77706 64.81793301410002 false Random P IIGRE 2023-12-12 2023-12-14T22:40:58 1678.44 +14 966 65 7828602539 62.43066 68.85873133439297 true Random I VVOQH 2023-12-01 2023-12-06T00:54:58 1300.43 +14 968 16 11314514196 62.50967 33.1841427251225 false Random T WDEVJ 2023-11-24 2023-12-06T17:54:58 431.61 +15 1500 75 15000000000 80.5 90 true Fifteenth O Omicron 2023-10-20 2023-10-20T05:15 1567.89 15 703 67 4284267079 85.38059 91.41088583496226 true Random T PHZRC 2023-12-04 2023-12-08T15:54:58 185.19 -16 135 22 7901304568 43.944805 85.16901944253635 true Random K NUQEP 2023-11-29 2023-11-25T23:42:58 1440.74 +16 135 22 7901304568 43.94481 85.16901944253635 true Random K NUQEP 2023-11-29 2023-11-25T23:42:58 1440.74 16 615 20 12294128025 77.37379 20.42772029677839 true Random U JHPOB 2023-11-30 2023-12-16T14:29:58 1105.33 -17 289 49 13560709243 39.952793 38.245306832599425 true Random Q QEYVY 2023-12-19 2023-12-07T00:35:58 500.19 -17 499 46 11230409207 51.632103 28.811164197154774 false Random V BVLUH 2023-12-13 2023-12-23T17:59:58 1387.62 +17 289 49 13560709243 39.95279 38.24530683259943 true Random Q QEYVY 2023-12-19 2023-12-07T00:35:58 500.19 +17 499 46 11230409207 51.6321 28.81116419715477 false Random V BVLUH 2023-12-13 2023-12-23T17:59:58 1387.62 17 646 62 11234805830 76.40492 67.46425239009778 true Random N REHZC 2023-12-09 2023-11-28T02:06:58 365.15 17 698 55 1807368797 20.17171 43.84496606184709 true Random P SHSJV 2023-12-01 2023-11-25T11:56:58 810.95 17 794 14 8377523030 28.07663 52.3837762020057 false Random E WPMIN 2023-12-03 2023-11-26T04:59:58 239.42 17 913 32 4647929554 78.91502 70.54487265463735 true Random S WFPNS 2023-11-27 2023-11-26T03:29:58 321.45 -18 690 17 1399456103 63.261967 42.964715823771236 true Random R BWSRS 2023-12-13 2023-12-23T08:33:58 1840.02 -18 835 17 14265814864 18.923101 80.53531451138412 true Random V PIKUZ 2023-12-20 2023-12-21T07:39:58 1167.09 -19 917 66 2340946367 89.035675 22.649362455875274 false Random D HWHMU 2023-11-30 2023-12-10T02:36:58 1960.07 -19 993 13 7039833438 79.769066 69.79049291517285 true Random X OFSUV 2023-12-11 2023-12-08T01:46:58 1958.95 +18 690 17 1399456103 63.26197 42.96471582377124 true Random R BWSRS 2023-12-13 2023-12-23T08:33:58 1840.02 +18 835 17 14265814864 18.9231 80.53531451138412 true Random V PIKUZ 2023-12-20 2023-12-21T07:39:58 1167.09 +19 917 66 2340946367 89.03568 22.64936245587527 false Random D HWHMU 2023-11-30 2023-12-10T02:36:58 1960.07 +19 993 13 7039833438 79.76907 69.79049291517285 true Random X OFSUV 2023-12-11 2023-12-08T01:46:58 1958.95 2 200 10 2000000000 15.75 25.5 false Second B Beta 2023-10-07 2023-10-07T15:45 234.56 2 850 75 7075823565 83.65178 62.56093886118189 false Random F RFHAG 2023-11-24 2023-12-03T01:06:58 495.12 2 921 62 8557914543 78.52379 58.6849882881372 false Random D KBXXS 2023-12-07 2023-12-02T22:24:58 1782.88 -2 925 46 6013180177 41.107002 34.86561026061906 true Random L XLLXY 2023-12-06 2023-12-09T14:04:58 1246.26 -20 248 64 7704906572 35.089928 76.69128821479936 true Random T KQOMS 2023-11-30 2023-12-11T01:35:58 1799.26 -22 200 41 12163439252 64.621254 81.68574929661384 true Random U KGVNU 2023-12-20 2023-11-30T14:56:58 1915.47 +2 925 46 6013180177 41.107 34.86561026061906 true Random L XLLXY 2023-12-06 2023-12-09T14:04:58 1246.26 +20 248 64 7704906572 35.08993 76.69128821479936 true Random T KQOMS 2023-11-30 2023-12-11T01:35:58 1799.26 +22 200 41 12163439252 64.62125 81.68574929661384 true Random U KGVNU 2023-12-20 2023-11-30T14:56:58 1915.47 22 235 19 6963606423 65.68033 54.1995295752517 true Random E ENVRH 2023-12-22 2023-11-29T14:42:58 864.89 -23 192 8 5102667616 54.111057 40.85713971600841 false Random J EBXEB 2023-12-13 2023-12-10T11:32:58 1824.12 -27 452 74 4240215371 50.569168 75.68204627611644 true Random G AZOWU 2023-12-01 2023-11-26T06:24:58 201.31 +23 192 8 5102667616 54.11106 40.85713971600841 false Random J EBXEB 2023-12-13 2023-12-10T11:32:58 1824.12 +27 452 74 4240215371 50.56917 75.68204627611644 true Random G AZOWU 2023-12-01 2023-11-26T06:24:58 201.31 27 866 24 5531365994 72.77447 86.96690821165853 false Random S TZPFJ 2023-11-28 2023-12-13T15:31:58 1274.75 -28 655 21 14580233860 12.503378 48.60220286874443 false Random P DUBQQ 2023-12-12 2023-12-03T20:11:58 922.42 -29 157 34 2302882987 51.924015 20.311140937696468 true Random R MBOXJ 2023-12-02 2023-12-03T14:12:58 1620.80 -29 910 52 5544039917 22.179396 46.32732226806482 true Random C TIZAG 2023-11-28 2023-12-14T16:08:58 900.96 -29 923 57 1591814253 68.57371 33.342802789892986 true Random Q ZONGC 2023-12-20 2023-12-13T09:11:58 1465.38 -3 259 74 7422478791 22.291426 75.38227773520089 true Random S VWAXJ 2023-12-01 2023-12-05T21:23:58 1970.57 -3 300 15 3000000000 20.25 30.0 true Third C Gamma 2023-10-08 2023-10-08T16:15 345.67 -3 422 25 5996825874 89.173584 62.758513798505824 false Random Z CDYAO 2023-12-14 2023-12-08T09:27:58 567.23 +28 655 21 14580233860 12.50338 48.60220286874443 false Random P DUBQQ 2023-12-12 2023-12-03T20:11:58 922.42 +29 157 34 2302882987 51.92402 20.31114093769647 true Random R MBOXJ 2023-12-02 2023-12-03T14:12:58 1620.80 +29 910 52 5544039917 22.1794 46.32732226806482 true Random C TIZAG 2023-11-28 2023-12-14T16:08:58 900.96 +29 923 57 1591814253 68.57371 33.34280278989299 true Random Q ZONGC 2023-12-20 2023-12-13T09:11:58 1465.38 +3 259 74 7422478791 22.29143 75.38227773520089 true Random S VWAXJ 2023-12-01 2023-12-05T21:23:58 1970.57 +3 300 15 3000000000 20.25 30 true Third C Gamma 2023-10-08 2023-10-08T16:15 345.67 +3 422 25 5996825874 89.17358 62.75851379850582 false Random Z CDYAO 2023-12-14 2023-12-08T09:27:58 567.23 3 668 60 1942550969 83.43451 87.15906153619602 true Random F QYSRS 2023-12-22 2023-12-10T22:17:58 320.22 -30 292 71 10308444223 63.039078 76.40649540444898 false Random G DRLHY 2023-12-19 2023-12-14T15:32:58 1165.14 -30 572 6 3022031043 57.813908 72.29244668177799 true Random X EHJDN 2023-12-11 2023-12-12T02:44:58 910.38 -30 830 65 12624057029 38.791172 59.72899174862661 false Random A LFPWP 2023-12-03 2023-12-17T00:10:58 1760.62 +30 292 71 10308444223 63.03908 76.40649540444898 false Random G DRLHY 2023-12-19 2023-12-14T15:32:58 1165.14 +30 572 6 3022031043 57.81391 72.29244668177799 true Random X EHJDN 2023-12-11 2023-12-12T02:44:58 910.38 +30 830 65 12624057029 38.79117 59.72899174862661 false Random A LFPWP 2023-12-03 2023-12-17T00:10:58 1760.62 31 395 22 6141426904 88.37914 52.0655270963123 false Random J DRPJV 2023-12-07 2023-11-29T03:15:58 1076.41 -31 990 5 13678786851 15.762894 85.24173385692956 false Random H THGIM 2023-12-14 2023-12-09T01:24:58 1834.37 +31 990 5 13678786851 15.76289 85.24173385692956 false Random H THGIM 2023-12-14 2023-12-09T01:24:58 1834.37 33 198 20 13225406950 67.7327 58.63863378877107 true Random I ZKXRA 2023-12-07 2023-11-29T02:33:58 566.76 33 321 39 12537851805 38.26871 32.6626492245712 true Random S OICCE 2023-12-02 2023-12-19T16:41:58 306.92 -33 916 53 5666674210 57.998173 61.774881852563475 true Random J WJAXA 2023-11-27 2023-12-05T19:58:58 976.13 +33 916 53 5666674210 57.99817 61.77488185256347 true Random J WJAXA 2023-11-27 2023-12-05T19:58:58 976.13 34 145 44 14060350663 73.02436 68.40544929600975 true Random S UUJFP 2023-11-23 2023-12-12T06:08:58 739.45 -34 585 43 1429300527 61.706585 80.88100239373303 false Random O JKJOH 2023-12-17 2023-12-07T11:00:58 468.11 -35 297 75 2468378214 51.353462 34.18114780065386 false Random C HBYZO 2023-12-05 2023-12-09T21:42:58 534.70 +34 585 43 1429300527 61.70658 80.88100239373303 false Random O JKJOH 2023-12-17 2023-12-07T11:00:58 468.11 +35 297 75 2468378214 51.35346 34.18114780065386 false Random C HBYZO 2023-12-05 2023-12-09T21:42:58 534.70 37 438 39 6809169396 83.56728 40.90894521029911 true Random W GXPAY 2023-12-07 2023-12-18T06:35:58 383.18 38 606 57 14585148556 82.67463 79.18300302689997 false Random E RSFUZ 2023-12-16 2023-11-27T18:55:58 970.25 -39 726 50 3865644066 26.225628 28.534393094364418 false Random F NIUCS 2023-12-05 2023-12-04T19:31:58 1953.82 +39 726 50 3865644066 26.22563 28.53439309436442 false Random F NIUCS 2023-12-05 2023-12-04T19:31:58 1953.82 4 122 24 10738473173 81.15482 60.21481394154484 false Random Y PQJRK 2023-12-20 2023-12-09T02:38:58 1467.35 4 400 20 4000000000 25.5 35.25 false Fourth D Delta 2023-10-09 2023-10-09T17:30 456.78 -4 569 72 10560903405 50.255936 47.535145739285184 false Random O NRIRC 2023-12-05 2023-12-01T09:10:58 1986.99 +4 569 72 10560903405 50.25594 47.53514573928518 false Random O NRIRC 2023-12-05 2023-12-01T09:10:58 1986.99 4 682 22 2040832636 60.33469 67.33499498711046 true Random W QUICJ 2023-11-24 2023-12-14T10:17:58 579.56 -40 230 34 10824964541 16.929768 53.812277279703366 false Random F YDQHF 2023-12-14 2023-12-03T17:42:58 1623.79 +40 230 34 10824964541 16.92977 53.81227727970337 false Random F YDQHF 2023-12-14 2023-12-03T17:42:58 1623.79 40 693 69 13276482882 44.35974 82.57845708670757 true Random B RCCSU 2023-11-29 2023-12-01T20:11:58 183.64 -40 914 7 4902128502 19.442041 33.099787387344406 true Random Q KOCWA 2023-11-28 2023-12-21T09:20:58 1824.80 -41 344 34 14536795918 56.660946 84.15108995619764 false Random Q KYLCH 2023-12-10 2023-12-04T08:25:58 1902.09 +40 914 7 4902128502 19.44204 33.09978738734441 true Random Q KOCWA 2023-11-28 2023-12-21T09:20:58 1824.80 +41 344 34 14536795918 56.66095 84.15108995619764 false Random Q KYLCH 2023-12-10 2023-12-04T08:25:58 1902.09 41 599 54 8095449906 22.58196 37.99742597458578 false Random T GTQXP 2023-12-12 2023-12-22T19:08:58 743.46 -41 697 21 1200243566 12.466168 68.57243624557165 true Random U JZGEG 2023-12-03 2023-12-10T04:51:58 1323.88 +41 697 21 1200243566 12.46617 68.57243624557165 true Random U JZGEG 2023-12-03 2023-12-10T04:51:58 1323.88 41 708 64 11745827370 72.84812 35.31028363777645 true Random O WGSQC 2023-12-02 2023-11-25T17:07:58 1666.71 -41 840 65 8988241658 37.428593 42.25992474748068 false Random E HURYX 2023-12-22 2023-12-19T01:55:58 141.89 +41 840 65 8988241658 37.42859 42.25992474748068 false Random E HURYX 2023-12-22 2023-12-19T01:55:58 141.89 42 143 42 3421815721 65.27691 87.91368867538209 true Random S AXGVL 2023-12-06 2023-11-29T07:36:58 575.01 42 178 38 7559404453 69.69449 64.37154501388798 true Random G QUMUN 2023-12-14 2023-12-17T01:37:58 1190.44 -42 192 28 14454791024 35.465202 46.34876515635648 false Random W NQFGR 2023-12-04 2023-11-24T05:02:58 1428.02 +42 192 28 14454791024 35.4652 46.34876515635648 false Random W NQFGR 2023-12-04 2023-11-24T05:02:58 1428.02 42 355 72 11536856285 74.42886 53.49032479461299 false Random I IQZEI 2023-12-10 2023-12-06T07:17:58 1098.14 -43 178 64 6969956763 40.980415 52.998828731408516 true Random C XQHYB 2023-12-11 2023-12-07T23:00:58 257.08 +43 178 64 6969956763 40.98042 52.99882873140852 true Random C XQHYB 2023-12-11 2023-12-07T23:00:58 257.08 43 828 24 12011396947 45.07647 54.2136449479346 true Random E HIDUO 2023-12-02 2023-12-19T01:14:58 233.10 44 219 38 8596488294 73.52956 94.10797854680568 true Random E HMWBI 2023-12-15 2023-12-06T00:51:58 1907.47 -44 694 55 3626514138 62.504086 72.89799265418553 true Random Z JTDVF 2023-12-01 2023-11-29T12:08:58 1769.92 -44 912 63 8534761366 55.993538 50.235171557550416 false Random N OVQRQ 2023-12-08 2023-11-24T03:39:58 264.92 -44 928 7 1939079012 14.426672 68.86451571230457 false Random I EKVWY 2023-12-15 2023-12-09T10:43:58 846.74 -45 455 25 12639246000 47.011307 26.310712594958694 false Random Z GGEUA 2023-11-27 2023-12-01T20:41:58 1698.21 -45 492 43 3870916386 51.069588 42.652270406300794 true Random H JVZTB 2023-12-04 2023-12-09T21:06:58 1517.83 -47 508 48 1456473942 48.488297 20.377955902326608 false Random B CAOEY 2023-11-29 2023-12-10T14:49:58 1865.52 -47 566 50 1426586688 51.278687 40.47151456873397 true Random F YBOSH 2023-11-26 2023-12-15T03:44:58 1806.35 +44 694 55 3626514138 62.50409 72.89799265418553 true Random Z JTDVF 2023-12-01 2023-11-29T12:08:58 1769.92 +44 912 63 8534761366 55.99354 50.23517155755042 false Random N OVQRQ 2023-12-08 2023-11-24T03:39:58 264.92 +44 928 7 1939079012 14.42667 68.86451571230457 false Random I EKVWY 2023-12-15 2023-12-09T10:43:58 846.74 +45 455 25 12639246000 47.01131 26.31071259495869 false Random Z GGEUA 2023-11-27 2023-12-01T20:41:58 1698.21 +45 492 43 3870916386 51.06959 42.65227040630079 true Random H JVZTB 2023-12-04 2023-12-09T21:06:58 1517.83 +47 508 48 1456473942 48.4883 20.37795590232661 false Random B CAOEY 2023-11-29 2023-12-10T14:49:58 1865.52 +47 566 50 1426586688 51.27869 40.47151456873397 true Random F YBOSH 2023-11-26 2023-12-15T03:44:58 1806.35 47 838 73 14910230294 83.69784 82.28901816600579 true Random L SHXYL 2023-11-24 2023-12-05T22:19:58 1062.15 48 898 59 12871187130 10.13838 70.19705104611333 true Random J WFXNN 2023-12-23 2023-12-17T02:53:58 1050.21 -49 165 38 4482178563 34.706547 69.17129468406594 false Random W CPZNY 2023-12-15 2023-11-23T19:56:58 512.60 -49 412 16 8300982793 56.263252 66.07893608061771 false Random K DWWJI 2023-12-08 2023-12-17T11:32:58 1718.54 +49 165 38 4482178563 34.70655 69.17129468406594 false Random W CPZNY 2023-12-15 2023-11-23T19:56:58 512.60 +49 412 16 8300982793 56.26325 66.07893608061771 false Random K DWWJI 2023-12-08 2023-12-17T11:32:58 1718.54 49 511 51 8602055259 88.1686 88.98712207285577 false Random M ZDKEY 2023-12-10 2023-11-25T02:44:58 241.08 -49 568 70 2916596630 79.16303 56.114316916863025 false Random T ILLIU 2023-11-23 2023-12-07T11:05:58 1039.03 +49 568 70 2916596630 79.16303 56.11431691686303 false Random T ILLIU 2023-11-23 2023-12-07T11:05:58 1039.03 5 500 25 5000000000 30.75 40.5 true Fifth E Epsilon 2023-10-10 2023-10-10T18:45 567.89 -5 768 5 4152322228 41.128906 78.60686390712706 false Random J LXKRA 2023-12-05 2023-11-24T18:13:58 1941.98 -5 823 63 13328808917 77.768196 22.87975226738422 false Random F OIYPV 2023-12-11 2023-12-14T06:43:58 1144.38 -5 887 74 4082758600 22.797577 93.28246034891224 false Random V MPPGX 2023-12-01 2023-11-29T01:53:58 510.50 +5 768 5 4152322228 41.12891 78.60686390712706 false Random J LXKRA 2023-12-05 2023-11-24T18:13:58 1941.98 +5 823 63 13328808917 77.7682 22.87975226738422 false Random F OIYPV 2023-12-11 2023-12-14T06:43:58 1144.38 +5 887 74 4082758600 22.79758 93.28246034891224 false Random V MPPGX 2023-12-01 2023-11-29T01:53:58 510.50 50 126 58 4433111715 75.31828 43.28056186824247 false Random H UTDJF 2023-12-19 2023-12-10T08:24:58 368.42 51 778 59 13914307584 27.48499 91.47665081887983 true Random X FGFHK 2023-12-01 2023-12-10T03:24:58 402.63 -51 898 32 13510411411 18.679659 21.406761033351007 false Random L FECUW 2023-12-10 2023-12-14T02:00:58 700.43 -52 811 31 14085958816 51.067017 65.01991893789116 true Random A CODYQ 2023-12-03 2023-12-07T23:25:58 1797.21 +51 898 32 13510411411 18.67966 21.40676103335101 false Random L FECUW 2023-12-10 2023-12-14T02:00:58 700.43 +52 811 31 14085958816 51.06702 65.01991893789116 true Random A CODYQ 2023-12-03 2023-12-07T23:25:58 1797.21 53 505 52 9862728376 58.40501 57.60544454281924 false Random V WYCTZ 2023-11-24 2023-12-20T05:13:58 210.43 53 667 49 10531976747 50.22229 49.64660893042742 false Random K WNRJE 2023-12-04 2023-12-19T14:57:58 680.97 -53 713 14 1464447148 23.474258 45.35056918414047 false Random Q UHMLT 2023-12-10 2023-11-30T02:07:58 286.70 +53 713 14 1464447148 23.47426 45.35056918414047 false Random Q UHMLT 2023-12-10 2023-11-30T02:07:58 286.70 53 715 29 10917905565 41.83069 93.50885201221966 true Random U TRLSY 2023-12-03 2023-11-26T15:13:58 369.72 -54 467 42 13684826428 38.491455 90.10566649802195 true Random M ERFBG 2023-11-24 2023-12-02T16:23:58 211.00 -54 827 55 7054839267 58.555687 25.891004802115663 false Random O ASMLW 2023-12-13 2023-12-20T16:41:58 1369.32 -54 843 34 9547939940 38.66475 36.370944299232434 true Random P NTVIR 2023-12-12 2023-12-02T06:45:58 1628.37 +54 467 42 13684826428 38.49146 90.10566649802195 true Random M ERFBG 2023-11-24 2023-12-02T16:23:58 211.00 +54 827 55 7054839267 58.55569 25.89100480211566 false Random O ASMLW 2023-12-13 2023-12-20T16:41:58 1369.32 +54 843 34 9547939940 38.66475 36.37094429923243 true Random P NTVIR 2023-12-12 2023-12-02T06:45:58 1628.37 55 908 24 13623721787 40.06427 90.85281792731746 false Random B KFZGI 2023-11-27 2023-12-23T18:06:58 1124.95 -55 964 8 14038541765 70.24135 20.034551391620194 false Random J AYXIT 2023-12-13 2023-12-16T19:38:58 1476.73 -57 936 26 12164628867 56.541275 56.276679149397076 true Random O IPHPZ 2023-12-13 2023-11-30T22:36:58 603.68 -59 144 31 6208909394 67.417076 40.59765633709834 true Random D FLWNA 2023-12-12 2023-12-19T06:17:58 1870.24 +55 964 8 14038541765 70.24135 20.03455139162019 false Random J AYXIT 2023-12-13 2023-12-16T19:38:58 1476.73 +57 936 26 12164628867 56.54128 56.27667914939708 true Random O IPHPZ 2023-12-13 2023-11-30T22:36:58 603.68 +59 144 31 6208909394 67.41708 40.59765633709834 true Random D FLWNA 2023-12-12 2023-12-19T06:17:58 1870.24 59 509 50 5501336408 39.94401 73.35770882761237 true Random I PVZNO 2023-12-04 2023-11-27T04:40:58 1177.33 6 600 30 6000000000 35.25 45.75 false Sixth F Zeta 2023-10-11 2023-10-11T19:15 678.90 -60 711 69 1493870104 22.574188 61.30347648465907 false Random E FHKVR 2023-11-27 2023-12-05T11:26:58 1981.61 -60 875 42 14283877167 48.811504 67.0706975606688 true Random P VJOZH 2023-12-06 2023-12-15T05:20:58 781.71 -61 267 61 11407448558 12.877184 42.144845857251944 true Random B NRWNW 2023-11-30 2023-11-25T09:34:58 859.85 -61 414 63 14506877706 12.540966 58.04557426323987 false Random H NUOAD 2023-12-10 2023-12-06T22:52:58 780.50 -62 451 50 12304139502 51.151623 22.46754141558852 false Random C SRRSV 2023-12-08 2023-12-20T02:48:58 1352.65 -62 793 46 7308804595 39.766644 48.88672198076526 true Random V TPENZ 2023-11-26 2023-12-23T17:51:58 388.46 +60 711 69 1493870104 22.57419 61.30347648465907 false Random E FHKVR 2023-11-27 2023-12-05T11:26:58 1981.61 +60 875 42 14283877167 48.8115 67.0706975606688 true Random P VJOZH 2023-12-06 2023-12-15T05:20:58 781.71 +61 267 61 11407448558 12.87718 42.14484585725194 true Random B NRWNW 2023-11-30 2023-11-25T09:34:58 859.85 +61 414 63 14506877706 12.54097 58.04557426323987 false Random H NUOAD 2023-12-10 2023-12-06T22:52:58 780.50 +62 451 50 12304139502 51.15162 22.46754141558852 false Random C SRRSV 2023-12-08 2023-12-20T02:48:58 1352.65 +62 793 46 7308804595 39.76664 48.88672198076526 true Random V TPENZ 2023-11-26 2023-12-23T17:51:58 388.46 63 112 75 12197306353 85.90137 43.48931389222043 false Random C KKAIT 2023-11-27 2023-12-23T04:23:58 1954.90 -63 383 35 5161212745 39.455276 52.33267523851794 false Random X TMYMC 2023-11-29 2023-12-10T09:09:58 1442.54 -63 410 33 1767102777 72.260124 56.971483381024896 false Random B QXNSM 2023-12-12 2023-12-19T22:57:58 1660.73 -64 479 20 1710421528 53.324104 33.55443503561635 false Random Q ONZRK 2023-12-09 2023-12-01T22:29:58 252.13 +63 383 35 5161212745 39.45528 52.33267523851794 false Random X TMYMC 2023-11-29 2023-12-10T09:09:58 1442.54 +63 410 33 1767102777 72.26012 56.9714833810249 false Random B QXNSM 2023-12-12 2023-12-19T22:57:58 1660.73 +64 479 20 1710421528 53.3241 33.55443503561635 false Random Q ONZRK 2023-12-09 2023-12-01T22:29:58 252.13 64 678 14 13681447851 74.83621 36.94143092647816 true Random J KELFB 2023-12-01 2023-12-07T18:14:58 308.26 -64 719 36 1224510454 64.237434 86.05689694804887 true Random E ZVQPU 2023-11-30 2023-12-03T04:56:58 1879.25 -64 822 26 1154241961 52.165447 26.779469377773403 true Random E YWNAD 2023-12-08 2023-12-19T19:08:58 731.15 -65 571 24 10523050555 45.865078 70.80680527390149 true Random Y DILBW 2023-12-17 2023-11-25T22:41:58 859.30 -66 306 5 14448160602 44.642223 50.24249889525751 false Random X OASEB 2023-12-11 2023-11-27T00:16:58 1345.69 -66 521 30 7757576974 69.440155 92.3562810104632 false Random H SSOCR 2023-12-19 2023-11-30T06:51:58 913.34 -67 484 65 10817432713 62.168163 77.02869166077757 true Random K SAJMG 2023-12-19 2023-12-14T19:47:58 488.01 -68 266 31 8183454755 69.19586 23.139304803938643 false Random S STCBM 2023-11-26 2023-12-22T13:42:58 1722.37 -68 554 33 3525526216 29.078024 29.6567390059356 false Random Y EUGOF 2023-11-23 2023-12-15T10:33:58 395.41 -68 591 60 4813122821 33.210274 54.464145718507616 false Random X EXROI 2023-12-07 2023-12-07T00:39:58 290.11 +64 719 36 1224510454 64.23743 86.05689694804887 true Random E ZVQPU 2023-11-30 2023-12-03T04:56:58 1879.25 +64 822 26 1154241961 52.16545 26.7794693777734 true Random E YWNAD 2023-12-08 2023-12-19T19:08:58 731.15 +65 571 24 10523050555 45.86508 70.80680527390149 true Random Y DILBW 2023-12-17 2023-11-25T22:41:58 859.30 +66 306 5 14448160602 44.64222 50.24249889525751 false Random X OASEB 2023-12-11 2023-11-27T00:16:58 1345.69 +66 521 30 7757576974 69.44016 92.35628101046321 false Random H SSOCR 2023-12-19 2023-11-30T06:51:58 913.34 +67 484 65 10817432713 62.16816 77.02869166077757 true Random K SAJMG 2023-12-19 2023-12-14T19:47:58 488.01 +68 266 31 8183454755 69.19586 23.13930480393864 false Random S STCBM 2023-11-26 2023-12-22T13:42:58 1722.37 +68 554 33 3525526216 29.07802 29.6567390059356 false Random Y EUGOF 2023-11-23 2023-12-15T10:33:58 395.41 +68 591 60 4813122821 33.21027 54.46414571850762 false Random X EXROI 2023-12-07 2023-12-07T00:39:58 290.11 68 756 63 5416393421 66.41538 76.32820339134415 false Random Y CUNAL 2023-12-23 2023-12-14T22:49:58 1109.25 -68 922 13 11664232196 72.683266 37.9910331525765 false Random W PPWBB 2023-11-26 2023-12-10T22:54:58 1968.89 -68 947 60 7257499958 45.661217 77.42577781358565 false Random F ENQGA 2023-11-24 2023-11-29T07:33:58 319.99 -69 416 14 7702410607 31.638903 89.5793904314531 true Random C URQMU 2023-11-25 2023-11-30T15:17:58 1379.22 +68 922 13 11664232196 72.68327 37.9910331525765 false Random W PPWBB 2023-11-26 2023-12-10T22:54:58 1968.89 +68 947 60 7257499958 45.66122 77.42577781358565 false Random F ENQGA 2023-11-24 2023-11-29T07:33:58 319.99 +69 416 14 7702410607 31.6389 89.57939043145311 true Random C URQMU 2023-11-25 2023-11-30T15:17:58 1379.22 7 340 50 8934567449 83.79683 35.39446967734915 false Random L CWYFN 2023-12-05 2023-12-23T02:26:58 806.15 -7 700 35 7000000000 40.5 50.0 true Seventh G Eta 2023-10-12 2023-10-12T20:30 789.01 +7 700 35 7000000000 40.5 50 true Seventh G Eta 2023-10-12 2023-10-12T20:30 789.01 7 969 62 3451343234 57.17074 56.74513811095188 false Random G OWDSC 2023-12-19 2023-12-11T17:17:58 1874.22 -70 231 67 4547989149 35.103123 51.93622592177748 true Random V ZBCVY 2023-11-29 2023-12-22T11:41:58 1749.60 -70 421 23 3153379289 27.412096 79.32006404438445 false Random L VLJWK 2023-12-04 2023-12-12T05:31:58 1163.35 +70 231 67 4547989149 35.10312 51.93622592177748 true Random V ZBCVY 2023-11-29 2023-12-22T11:41:58 1749.60 +70 421 23 3153379289 27.4121 79.32006404438445 false Random L VLJWK 2023-12-04 2023-12-12T05:31:58 1163.35 70 751 56 7828222634 52.8313 55.7263634552559 true Random B TFHMH 2023-11-30 2023-12-24T12:22:58 1166.13 -71 452 25 4464808420 18.155642 61.988641984596185 false Random K YXFVY 2023-12-15 2023-12-08T04:58:58 514.74 -71 594 26 1024634104 62.92234 37.216752731371386 true Random J SPUWU 2023-12-04 2023-12-23T08:50:58 779.97 -72 377 11 3042707243 55.289066 53.72552524152444 true Random Q BAPHV 2023-12-06 2023-11-30T07:14:58 119.39 -73 866 49 4618070115 46.803646 91.41305051885227 true Random H ROYYF 2023-12-07 2023-12-01T10:28:58 1817.67 -74 670 60 4783926122 23.513939 91.24357097091087 true Random Y YFPMC 2023-12-23 2023-12-22T22:29:58 943.62 -75 368 73 6944888766 31.500992 56.88267149430107 false Random H LEXKZ 2023-12-21 2023-12-14T01:12:58 443.91 +71 452 25 4464808420 18.15564 61.98864198459619 false Random K YXFVY 2023-12-15 2023-12-08T04:58:58 514.74 +71 594 26 1024634104 62.92234 37.21675273137139 true Random J SPUWU 2023-12-04 2023-12-23T08:50:58 779.97 +72 377 11 3042707243 55.28907 53.72552524152444 true Random Q BAPHV 2023-12-06 2023-11-30T07:14:58 119.39 +73 866 49 4618070115 46.80365 91.41305051885227 true Random H ROYYF 2023-12-07 2023-12-01T10:28:58 1817.67 +74 670 60 4783926122 23.51394 91.24357097091087 true Random Y YFPMC 2023-12-23 2023-12-22T22:29:58 943.62 +75 368 73 6944888766 31.50099 56.88267149430107 false Random H LEXKZ 2023-12-21 2023-12-14T01:12:58 443.91 76 410 20 10425110604 66.26356 92.68329033006493 false Random L JHFYD 2023-11-23 2023-11-29T10:34:58 867.56 -76 504 70 14161652666 58.071503 67.99111956708262 true Random Y HAVCK 2023-11-27 2023-12-14T16:08:58 1864.98 +76 504 70 14161652666 58.0715 67.99111956708262 true Random Y HAVCK 2023-11-27 2023-12-14T16:08:58 1864.98 77 131 19 2964167114 33.23181 53.35246738882714 false Random G AHGFO 2023-12-19 2023-12-01T10:11:58 1837.90 -77 165 36 12887722637 19.729382 45.61157603163882 true Random S OZOLB 2023-12-02 2023-12-03T05:07:58 1576.79 -79 314 17 6823498005 22.562634 72.70049796639023 true Random K FPSNZ 2023-12-07 2023-12-15T11:52:58 211.50 +77 165 36 12887722637 19.72938 45.61157603163882 true Random S OZOLB 2023-12-02 2023-12-03T05:07:58 1576.79 +79 314 17 6823498005 22.56263 72.70049796639023 true Random K FPSNZ 2023-12-07 2023-12-15T11:52:58 211.50 8 550 48 13655992126 52.90345 51.35114230137935 false Random X JTVSE 2023-12-13 2023-12-15T03:49:58 361.55 8 800 40 8000000000 45.75 55.25 false Eighth H Theta 2023-10-13 2023-10-13T21:45 890.12 8 866 37 13672147880 81.28999 67.66548594336737 false Random H QDJIM 2023-12-14 2023-12-17T18:44:58 1112.05 -80 267 57 8797946135 35.604717 80.51381110359165 false Random K KQTEX 2023-12-09 2023-12-13T06:19:58 1769.15 -80 815 19 14529289205 19.769405 37.37008094684765 true Random Z WLALH 2023-12-11 2023-12-14T03:24:58 479.38 +80 267 57 8797946135 35.60472 80.51381110359165 false Random K KQTEX 2023-12-09 2023-12-13T06:19:58 1769.15 +80 815 19 14529289205 19.76941 37.37008094684765 true Random Z WLALH 2023-12-11 2023-12-14T03:24:58 479.38 81 726 66 9327218218 81.50363 39.9702863173827 true Random X WODRP 2023-11-28 2023-12-23T13:25:58 561.98 82 107 51 1358006007 78.36581 46.09413324325159 true Random C IPNQU 2023-12-01 2023-12-14T05:41:58 417.17 82 133 60 4616538638 88.8813 30.82745983013354 true Random W KPIJE 2023-12-20 2023-12-01T07:57:58 583.41 -82 531 44 10642962933 26.818586 23.851865471979615 false Random F NMQOD 2023-12-13 2023-12-18T19:34:58 861.78 +82 531 44 10642962933 26.81859 23.85186547197961 false Random F NMQOD 2023-12-13 2023-12-18T19:34:58 861.78 82 603 60 9083469993 81.24088 44.46228092092543 true Random Y WTQGU 2023-11-30 2023-11-28T13:18:58 1448.45 82 982 62 8955063933 81.2855 78.30439669511465 true Random J SOCOT 2023-12-02 2023-12-02T21:17:58 814.60 -83 700 46 4569093424 50.063602 47.75811273142146 false Random R TEGAY 2023-12-19 2023-12-07T06:46:58 760.22 -84 427 60 9035762847 81.971306 28.37315065501099 true Random L FETYF 2023-12-01 2023-11-24T15:00:58 1267.12 +83 700 46 4569093424 50.0636 47.75811273142146 false Random R TEGAY 2023-12-19 2023-12-07T06:46:58 760.22 +84 427 60 9035762847 81.97131 28.37315065501099 true Random L FETYF 2023-12-01 2023-11-24T15:00:58 1267.12 85 375 63 6797318130 85.47522 58.16330728665678 true Random E UNZLS 2023-12-01 2023-12-04T05:17:58 1949.48 -85 845 42 2373712244 74.551315 79.15491248184088 false Random B QJRKO 2023-11-29 2023-12-04T09:20:58 317.17 -85 873 18 7233488476 33.83051 31.655950581225508 false Random N RJTIB 2023-11-23 2023-12-11T15:07:58 1249.52 -86 398 27 13222936963 20.387327 44.51255195842424 true Random T ZCRFI 2023-12-21 2023-12-23T12:04:58 1801.53 +85 845 42 2373712244 74.55132 79.15491248184088 false Random B QJRKO 2023-11-29 2023-12-04T09:20:58 317.17 +85 873 18 7233488476 33.83051 31.65595058122551 false Random N RJTIB 2023-11-23 2023-12-11T15:07:58 1249.52 +86 398 27 13222936963 20.38733 44.51255195842424 true Random T ZCRFI 2023-12-21 2023-12-23T12:04:58 1801.53 86 662 53 8875065706 28.64778 30.6775849729486 false Random N YNQAY 2023-12-15 2023-11-24T21:56:58 1108.35 -86 728 18 13390353484 61.060482 87.44751616093882 false Random J BUCVI 2023-12-07 2023-12-14T23:00:58 1611.17 -86 998 74 11080891106 82.568756 32.0122101203062 true Random K VAAMT 2023-12-23 2023-12-01T10:14:58 1708.39 +86 728 18 13390353484 61.06048 87.44751616093882 false Random J BUCVI 2023-12-07 2023-12-14T23:00:58 1611.17 +86 998 74 11080891106 82.56876 32.0122101203062 true Random K VAAMT 2023-12-23 2023-12-01T10:14:58 1708.39 87 145 64 9022533179 37.80205 63.26081178595084 true Random T PEOPK 2023-12-08 2023-12-07T17:41:58 1167.05 -87 641 64 4786767059 14.765089 70.8793353664754 false Random W SQHGN 2023-12-12 2023-12-24T01:19:58 1316.61 -88 274 41 14108849690 73.74919 42.625751442467404 true Random X BVRFA 2023-12-01 2023-11-25T14:32:58 515.18 -88 728 59 8439434199 30.372904 59.410283344764366 false Random F JODWY 2023-12-04 2023-12-01T07:57:58 1753.88 +87 641 64 4786767059 14.76509 70.8793353664754 false Random W SQHGN 2023-12-12 2023-12-24T01:19:58 1316.61 +88 274 41 14108849690 73.74919 42.6257514424674 true Random X BVRFA 2023-12-01 2023-11-25T14:32:58 515.18 +88 728 59 8439434199 30.3729 59.41028334476437 false Random F JODWY 2023-12-04 2023-12-01T07:57:58 1753.88 88 765 69 9753682777 83.42646 25.99260711248508 true Random M MEJAX 2023-11-25 2023-12-20T09:21:58 1647.22 -89 129 64 6400162051 67.910965 80.48074661432221 true Random Y ZXJWQ 2023-12-16 2023-12-19T10:23:58 1882.65 -89 377 22 14340881803 32.61157 82.5503801214006 false Random K ACYZU 2023-12-01 2023-11-27T02:05:58 672.13 -89 964 41 12706120446 69.484116 32.39048200771184 true Random J IIRNY 2023-12-16 2023-11-29T01:54:58 1298.71 -9 113 7 6162580854 11.346889 46.82839094332704 false Random A SJTAF 2023-12-14 2023-11-23T18:27:58 1610.49 +89 129 64 6400162051 67.91096 80.48074661432221 true Random Y ZXJWQ 2023-12-16 2023-12-19T10:23:58 1882.65 +89 377 22 14340881803 32.61157 82.55038012140059 false Random K ACYZU 2023-12-01 2023-11-27T02:05:58 672.13 +89 964 41 12706120446 69.48412 32.39048200771184 true Random J IIRNY 2023-12-16 2023-11-29T01:54:58 1298.71 +9 113 7 6162580854 11.34689 46.82839094332704 false Random A SJTAF 2023-12-14 2023-11-23T18:27:58 1610.49 9 268 59 8149280252 86.66627 70.91298799618343 false Random E PVKYK 2023-12-21 2023-11-25T00:28:58 263.17 9 900 45 9000000000 50.0 60.5 true Ninth I Iota 2023-10-14 2023-10-14T22:15 901.23 9 907 24 6113036809 66.06377 50.26485838775805 true Random X XLPOL 2023-11-23 2023-12-02T09:03:58 256.61 90 391 26 12874761259 21.49042 53.46850617467312 true Random Q QTJPE 2023-12-17 2023-12-03T17:40:58 748.05 -91 389 11 14784237986 11.174142 27.692284427565397 true Random P DYILB 2023-12-14 2023-12-21T11:07:58 1175.73 +91 389 11 14784237986 11.17414 27.6922844275654 true Random P DYILB 2023-12-14 2023-12-21T11:07:58 1175.73 91 528 68 14588592231 77.4651 88.92064181463138 false Random U JXZUA 2023-12-16 2023-12-21T02:28:58 1834.07 -92 344 29 5182139341 31.653255 44.26814517218887 true Random F NGHOS 2023-12-06 2023-12-09T21:25:58 1291.06 -93 887 20 13555948969 70.57364 32.621532934876804 false Random D SPMEK 2023-11-26 2023-12-20T18:11:58 258.86 -94 216 49 8773264156 81.617195 43.03983700523827 true Random D VHWYT 2023-12-13 2023-11-30T07:03:58 1178.27 -94 693 60 4818659234 26.04229 83.2975107272106 true Random B ENSQO 2023-12-22 2023-12-12T06:08:58 1283.81 -95 560 62 1389447643 19.202044 85.46518830161321 true Random S LQRRB 2023-12-16 2023-12-12T06:12:58 445.65 -96 595 72 11506136303 21.917727 74.74561804277158 true Random T SPLKA 2023-12-02 2023-11-30T00:39:58 1693.61 -96 637 39 5516035994 55.90832 60.522041012562816 true Random O YPETL 2023-12-02 2023-11-28T02:47:58 1175.16 -97 415 74 10346322649 21.667427 46.58901867647463 false Random R KWFOF 2023-12-21 2023-11-27T12:18:58 1157.72 +92 344 29 5182139341 31.65326 44.26814517218887 true Random F NGHOS 2023-12-06 2023-12-09T21:25:58 1291.06 +93 887 20 13555948969 70.57364 32.6215329348768 false Random D SPMEK 2023-11-26 2023-12-20T18:11:58 258.86 +94 216 49 8773264156 81.6172 43.03983700523827 true Random D VHWYT 2023-12-13 2023-11-30T07:03:58 1178.27 +94 693 60 4818659234 26.04229 83.29751072721059 true Random B ENSQO 2023-12-22 2023-12-12T06:08:58 1283.81 +95 560 62 1389447643 19.20204 85.46518830161321 true Random S LQRRB 2023-12-16 2023-12-12T06:12:58 445.65 +96 595 72 11506136303 21.91773 74.74561804277158 true Random T SPLKA 2023-12-02 2023-11-30T00:39:58 1693.61 +96 637 39 5516035994 55.90832 60.52204101256282 true Random O YPETL 2023-12-02 2023-11-28T02:47:58 1175.16 +97 415 74 10346322649 21.66743 46.58901867647463 false Random R KWFOF 2023-12-21 2023-11-27T12:18:58 1157.72 97 839 60 14818779777 46.17389 68.98285340004992 false Random W HMFPU 2023-12-01 2023-12-04T08:41:58 1683.48 -98 228 65 4782017237 55.10206 31.414570993700565 true Random P EOIFT 2023-12-07 2023-12-15T08:12:58 137.49 -99 632 39 8911195323 74.581276 78.2764804276292 false Random Q WTQCL 2023-12-02 2023-12-05T09:18:58 200.21 +98 228 65 4782017237 55.10206 31.41457099370056 true Random P EOIFT 2023-12-07 2023-12-15T08:12:58 137.49 +99 632 39 8911195323 74.58128 78.2764804276292 false Random Q WTQCL 2023-12-02 2023-12-05T09:18:58 200.21 -- !q44 -- -17 289 49 13560709243 39.952793 38.245306832599425 true Random Q QEYVY 2023-12-19 2023-12-07T00:35:58 500.19 -17 499 46 11230409207 51.632103 28.811164197154774 false Random V BVLUH 2023-12-13 2023-12-23T17:59:58 1387.62 +17 289 49 13560709243 39.95279 38.24530683259943 true Random Q QEYVY 2023-12-19 2023-12-07T00:35:58 500.19 +17 499 46 11230409207 51.6321 28.81116419715477 false Random V BVLUH 2023-12-13 2023-12-23T17:59:58 1387.62 17 646 62 11234805830 76.40492 67.46425239009778 true Random N REHZC 2023-12-09 2023-11-28T02:06:58 365.15 17 698 55 1807368797 20.17171 43.84496606184709 true Random P SHSJV 2023-12-01 2023-11-25T11:56:58 810.95 17 794 14 8377523030 28.07663 52.3837762020057 false Random E WPMIN 2023-12-03 2023-11-26T04:59:58 239.42 17 913 32 4647929554 78.91502 70.54487265463735 true Random S WFPNS 2023-11-27 2023-11-26T03:29:58 321.45 -- !q45 -- -11 1100 55 11000000000 60.5 70.0 true Eleventh K Lambda 2023-10-16 2023-10-16T01:45 1123.45 -11 487 27 14556302216 85.33334 62.596750833474495 true Random E QMHJD 2023-12-23 2023-12-24T08:30:58 1491.22 +11 1100 55 11000000000 60.5 70 true Eleventh K Lambda 2023-10-16 2023-10-16T01:45 1123.45 +11 487 27 14556302216 85.33334 62.59675083347449 true Random E QMHJD 2023-12-23 2023-12-24T08:30:58 1491.22 12 1200 60 12000000000 65.75 75.25 false Twelfth L Mu 2023-10-17 2023-10-17T02:15 1234.56 -12 751 8 12205294947 23.468674 64.35048302450815 true Random K FCSBV 2023-12-03 2023-12-17T01:10:58 325.26 +12 751 8 12205294947 23.46867 64.35048302450815 true Random K FCSBV 2023-12-03 2023-12-17T01:10:58 325.26 13 1300 65 13000000000 70.0 80.5 true Thirteenth M Nu 2023-10-18 2023-10-18T03:30 1345.67 -13 335 39 13869202091 30.426075 39.02304533093442 true Random L AULCC 2023-12-08 2023-12-13T00:26:58 387.97 +13 335 39 13869202091 30.42607 39.02304533093442 true Random L AULCC 2023-12-08 2023-12-13T00:26:58 387.97 13 402 30 10851194313 74.82481 74.90108005771035 false Random F GEMMK 2023-11-27 2023-12-21T15:03:58 1643.55 -13 745 48 13047949175 51.168613 85.21972389262197 true Random A AYBWQ 2023-12-22 2023-12-22T16:25:58 1192.48 +13 745 48 13047949175 51.16861 85.21972389262197 true Random A AYBWQ 2023-12-22 2023-12-22T16:25:58 1192.48 14 1400 70 14000000000 75.25 85.75 false Fourteenth N Xi 2023-10-19 2023-10-19T04:45 1456.78 -14 968 16 11314514196 62.509666 33.1841427251225 false Random T WDEVJ 2023-11-24 2023-12-06T17:54:58 431.61 -15 1500 75 15000000000 80.5 90.0 true Fifteenth O Omicron 2023-10-20 2023-10-20T05:15 1567.89 +14 968 16 11314514196 62.50967 33.1841427251225 false Random T WDEVJ 2023-11-24 2023-12-06T17:54:58 431.61 +15 1500 75 15000000000 80.5 90 true Fifteenth O Omicron 2023-10-20 2023-10-20T05:15 1567.89 16 615 20 12294128025 77.37379 20.42772029677839 true Random U JHPOB 2023-11-30 2023-12-16T14:29:58 1105.33 -17 289 49 13560709243 39.952793 38.245306832599425 true Random Q QEYVY 2023-12-19 2023-12-07T00:35:58 500.19 -17 499 46 11230409207 51.632103 28.811164197154774 false Random V BVLUH 2023-12-13 2023-12-23T17:59:58 1387.62 +17 289 49 13560709243 39.95279 38.24530683259943 true Random Q QEYVY 2023-12-19 2023-12-07T00:35:58 500.19 +17 499 46 11230409207 51.6321 28.81116419715477 false Random V BVLUH 2023-12-13 2023-12-23T17:59:58 1387.62 17 646 62 11234805830 76.40492 67.46425239009778 true Random N REHZC 2023-12-09 2023-11-28T02:06:58 365.15 -18 835 17 14265814864 18.923101 80.53531451138412 true Random V PIKUZ 2023-12-20 2023-12-21T07:39:58 1167.09 -22 200 41 12163439252 64.621254 81.68574929661384 true Random U KGVNU 2023-12-20 2023-11-30T14:56:58 1915.47 -28 655 21 14580233860 12.503378 48.60220286874443 false Random P DUBQQ 2023-12-12 2023-12-03T20:11:58 922.42 -30 830 65 12624057029 38.791172 59.72899174862661 false Random A LFPWP 2023-12-03 2023-12-17T00:10:58 1760.62 -31 990 5 13678786851 15.762894 85.24173385692956 false Random H THGIM 2023-12-14 2023-12-09T01:24:58 1834.37 +18 835 17 14265814864 18.9231 80.53531451138412 true Random V PIKUZ 2023-12-20 2023-12-21T07:39:58 1167.09 +22 200 41 12163439252 64.62125 81.68574929661384 true Random U KGVNU 2023-12-20 2023-11-30T14:56:58 1915.47 +28 655 21 14580233860 12.50338 48.60220286874443 false Random P DUBQQ 2023-12-12 2023-12-03T20:11:58 922.42 +30 830 65 12624057029 38.79117 59.72899174862661 false Random A LFPWP 2023-12-03 2023-12-17T00:10:58 1760.62 +31 990 5 13678786851 15.76289 85.24173385692956 false Random H THGIM 2023-12-14 2023-12-09T01:24:58 1834.37 33 198 20 13225406950 67.7327 58.63863378877107 true Random I ZKXRA 2023-12-07 2023-11-29T02:33:58 566.76 33 321 39 12537851805 38.26871 32.6626492245712 true Random S OICCE 2023-12-02 2023-12-19T16:41:58 306.92 34 145 44 14060350663 73.02436 68.40544929600975 true Random S UUJFP 2023-11-23 2023-12-12T06:08:58 739.45 38 606 57 14585148556 82.67463 79.18300302689997 false Random E RSFUZ 2023-12-16 2023-11-27T18:55:58 970.25 4 122 24 10738473173 81.15482 60.21481394154484 false Random Y PQJRK 2023-12-20 2023-12-09T02:38:58 1467.35 -40 230 34 10824964541 16.929768 53.812277279703366 false Random F YDQHF 2023-12-14 2023-12-03T17:42:58 1623.79 +40 230 34 10824964541 16.92977 53.81227727970337 false Random F YDQHF 2023-12-14 2023-12-03T17:42:58 1623.79 40 693 69 13276482882 44.35974 82.57845708670757 true Random B RCCSU 2023-11-29 2023-12-01T20:11:58 183.64 -41 344 34 14536795918 56.660946 84.15108995619764 false Random Q KYLCH 2023-12-10 2023-12-04T08:25:58 1902.09 +41 344 34 14536795918 56.66095 84.15108995619764 false Random Q KYLCH 2023-12-10 2023-12-04T08:25:58 1902.09 41 708 64 11745827370 72.84812 35.31028363777645 true Random O WGSQC 2023-12-02 2023-11-25T17:07:58 1666.71 -42 192 28 14454791024 35.465202 46.34876515635648 false Random W NQFGR 2023-12-04 2023-11-24T05:02:58 1428.02 +42 192 28 14454791024 35.4652 46.34876515635648 false Random W NQFGR 2023-12-04 2023-11-24T05:02:58 1428.02 42 355 72 11536856285 74.42886 53.49032479461299 false Random I IQZEI 2023-12-10 2023-12-06T07:17:58 1098.14 43 828 24 12011396947 45.07647 54.2136449479346 true Random E HIDUO 2023-12-02 2023-12-19T01:14:58 233.10 -45 455 25 12639246000 47.011307 26.310712594958694 false Random Z GGEUA 2023-11-27 2023-12-01T20:41:58 1698.21 +45 455 25 12639246000 47.01131 26.31071259495869 false Random Z GGEUA 2023-11-27 2023-12-01T20:41:58 1698.21 47 838 73 14910230294 83.69784 82.28901816600579 true Random L SHXYL 2023-11-24 2023-12-05T22:19:58 1062.15 48 898 59 12871187130 10.13838 70.19705104611333 true Random J WFXNN 2023-12-23 2023-12-17T02:53:58 1050.21 -5 823 63 13328808917 77.768196 22.87975226738422 false Random F OIYPV 2023-12-11 2023-12-14T06:43:58 1144.38 +5 823 63 13328808917 77.7682 22.87975226738422 false Random F OIYPV 2023-12-11 2023-12-14T06:43:58 1144.38 51 778 59 13914307584 27.48499 91.47665081887983 true Random X FGFHK 2023-12-01 2023-12-10T03:24:58 402.63 -51 898 32 13510411411 18.679659 21.406761033351007 false Random L FECUW 2023-12-10 2023-12-14T02:00:58 700.43 -52 811 31 14085958816 51.067017 65.01991893789116 true Random A CODYQ 2023-12-03 2023-12-07T23:25:58 1797.21 +51 898 32 13510411411 18.67966 21.40676103335101 false Random L FECUW 2023-12-10 2023-12-14T02:00:58 700.43 +52 811 31 14085958816 51.06702 65.01991893789116 true Random A CODYQ 2023-12-03 2023-12-07T23:25:58 1797.21 53 715 29 10917905565 41.83069 93.50885201221966 true Random U TRLSY 2023-12-03 2023-11-26T15:13:58 369.72 -54 467 42 13684826428 38.491455 90.10566649802195 true Random M ERFBG 2023-11-24 2023-12-02T16:23:58 211.00 +54 467 42 13684826428 38.49146 90.10566649802195 true Random M ERFBG 2023-11-24 2023-12-02T16:23:58 211.00 55 908 24 13623721787 40.06427 90.85281792731746 false Random B KFZGI 2023-11-27 2023-12-23T18:06:58 1124.95 -55 964 8 14038541765 70.24135 20.034551391620194 false Random J AYXIT 2023-12-13 2023-12-16T19:38:58 1476.73 -57 936 26 12164628867 56.541275 56.276679149397076 true Random O IPHPZ 2023-12-13 2023-11-30T22:36:58 603.68 -60 875 42 14283877167 48.811504 67.0706975606688 true Random P VJOZH 2023-12-06 2023-12-15T05:20:58 781.71 -61 267 61 11407448558 12.877184 42.144845857251944 true Random B NRWNW 2023-11-30 2023-11-25T09:34:58 859.85 -61 414 63 14506877706 12.540966 58.04557426323987 false Random H NUOAD 2023-12-10 2023-12-06T22:52:58 780.50 -62 451 50 12304139502 51.151623 22.46754141558852 false Random C SRRSV 2023-12-08 2023-12-20T02:48:58 1352.65 +55 964 8 14038541765 70.24135 20.03455139162019 false Random J AYXIT 2023-12-13 2023-12-16T19:38:58 1476.73 +57 936 26 12164628867 56.54128 56.27667914939708 true Random O IPHPZ 2023-12-13 2023-11-30T22:36:58 603.68 +60 875 42 14283877167 48.8115 67.0706975606688 true Random P VJOZH 2023-12-06 2023-12-15T05:20:58 781.71 +61 267 61 11407448558 12.87718 42.14484585725194 true Random B NRWNW 2023-11-30 2023-11-25T09:34:58 859.85 +61 414 63 14506877706 12.54097 58.04557426323987 false Random H NUOAD 2023-12-10 2023-12-06T22:52:58 780.50 +62 451 50 12304139502 51.15162 22.46754141558852 false Random C SRRSV 2023-12-08 2023-12-20T02:48:58 1352.65 63 112 75 12197306353 85.90137 43.48931389222043 false Random C KKAIT 2023-11-27 2023-12-23T04:23:58 1954.90 64 678 14 13681447851 74.83621 36.94143092647816 true Random J KELFB 2023-12-01 2023-12-07T18:14:58 308.26 -66 306 5 14448160602 44.642223 50.24249889525751 false Random X OASEB 2023-12-11 2023-11-27T00:16:58 1345.69 -67 484 65 10817432713 62.168163 77.02869166077757 true Random K SAJMG 2023-12-19 2023-12-14T19:47:58 488.01 -68 922 13 11664232196 72.683266 37.9910331525765 false Random W PPWBB 2023-11-26 2023-12-10T22:54:58 1968.89 -76 504 70 14161652666 58.071503 67.99111956708262 true Random Y HAVCK 2023-11-27 2023-12-14T16:08:58 1864.98 -77 165 36 12887722637 19.729382 45.61157603163882 true Random S OZOLB 2023-12-02 2023-12-03T05:07:58 1576.79 +66 306 5 14448160602 44.64222 50.24249889525751 false Random X OASEB 2023-12-11 2023-11-27T00:16:58 1345.69 +67 484 65 10817432713 62.16816 77.02869166077757 true Random K SAJMG 2023-12-19 2023-12-14T19:47:58 488.01 +68 922 13 11664232196 72.68327 37.9910331525765 false Random W PPWBB 2023-11-26 2023-12-10T22:54:58 1968.89 +76 504 70 14161652666 58.0715 67.99111956708262 true Random Y HAVCK 2023-11-27 2023-12-14T16:08:58 1864.98 +77 165 36 12887722637 19.72938 45.61157603163882 true Random S OZOLB 2023-12-02 2023-12-03T05:07:58 1576.79 8 550 48 13655992126 52.90345 51.35114230137935 false Random X JTVSE 2023-12-13 2023-12-15T03:49:58 361.55 8 866 37 13672147880 81.28999 67.66548594336737 false Random H QDJIM 2023-12-14 2023-12-17T18:44:58 1112.05 -80 815 19 14529289205 19.769405 37.37008094684765 true Random Z WLALH 2023-12-11 2023-12-14T03:24:58 479.38 -86 398 27 13222936963 20.387327 44.51255195842424 true Random T ZCRFI 2023-12-21 2023-12-23T12:04:58 1801.53 -86 728 18 13390353484 61.060482 87.44751616093882 false Random J BUCVI 2023-12-07 2023-12-14T23:00:58 1611.17 -86 998 74 11080891106 82.568756 32.0122101203062 true Random K VAAMT 2023-12-23 2023-12-01T10:14:58 1708.39 -88 274 41 14108849690 73.74919 42.625751442467404 true Random X BVRFA 2023-12-01 2023-11-25T14:32:58 515.18 -89 377 22 14340881803 32.61157 82.5503801214006 false Random K ACYZU 2023-12-01 2023-11-27T02:05:58 672.13 -89 964 41 12706120446 69.484116 32.39048200771184 true Random J IIRNY 2023-12-16 2023-11-29T01:54:58 1298.71 +80 815 19 14529289205 19.76941 37.37008094684765 true Random Z WLALH 2023-12-11 2023-12-14T03:24:58 479.38 +86 398 27 13222936963 20.38733 44.51255195842424 true Random T ZCRFI 2023-12-21 2023-12-23T12:04:58 1801.53 +86 728 18 13390353484 61.06048 87.44751616093882 false Random J BUCVI 2023-12-07 2023-12-14T23:00:58 1611.17 +86 998 74 11080891106 82.56876 32.0122101203062 true Random K VAAMT 2023-12-23 2023-12-01T10:14:58 1708.39 +88 274 41 14108849690 73.74919 42.6257514424674 true Random X BVRFA 2023-12-01 2023-11-25T14:32:58 515.18 +89 377 22 14340881803 32.61157 82.55038012140059 false Random K ACYZU 2023-12-01 2023-11-27T02:05:58 672.13 +89 964 41 12706120446 69.48412 32.39048200771184 true Random J IIRNY 2023-12-16 2023-11-29T01:54:58 1298.71 90 391 26 12874761259 21.49042 53.46850617467312 true Random Q QTJPE 2023-12-17 2023-12-03T17:40:58 748.05 -91 389 11 14784237986 11.174142 27.692284427565397 true Random P DYILB 2023-12-14 2023-12-21T11:07:58 1175.73 +91 389 11 14784237986 11.17414 27.6922844275654 true Random P DYILB 2023-12-14 2023-12-21T11:07:58 1175.73 91 528 68 14588592231 77.4651 88.92064181463138 false Random U JXZUA 2023-12-16 2023-12-21T02:28:58 1834.07 -93 887 20 13555948969 70.57364 32.621532934876804 false Random D SPMEK 2023-11-26 2023-12-20T18:11:58 258.86 -96 595 72 11506136303 21.917727 74.74561804277158 true Random T SPLKA 2023-12-02 2023-11-30T00:39:58 1693.61 +93 887 20 13555948969 70.57364 32.6215329348768 false Random D SPMEK 2023-11-26 2023-12-20T18:11:58 258.86 +96 595 72 11506136303 21.91773 74.74561804277158 true Random T SPLKA 2023-12-02 2023-11-30T00:39:58 1693.61 97 839 60 14818779777 46.17389 68.98285340004992 false Random W HMFPU 2023-12-01 2023-12-04T08:41:58 1683.48 -- !q46 -- -1 578 55 2111222273 56.858597 82.38111658179561 true Random C LYDUG 2023-12-17 2023-12-05T13:04:58 1393.11 -29 910 52 5544039917 22.179396 46.32732226806482 true Random C TIZAG 2023-11-28 2023-12-14T16:08:58 900.96 -3 300 15 3000000000 20.25 30.0 true Third C Gamma 2023-10-08 2023-10-08T16:15 345.67 -43 178 64 6969956763 40.980415 52.998828731408516 true Random C XQHYB 2023-12-11 2023-12-07T23:00:58 257.08 -69 416 14 7702410607 31.638903 89.5793904314531 true Random C URQMU 2023-11-25 2023-11-30T15:17:58 1379.22 +1 578 55 2111222273 56.8586 82.38111658179561 true Random C LYDUG 2023-12-17 2023-12-05T13:04:58 1393.11 +29 910 52 5544039917 22.1794 46.32732226806482 true Random C TIZAG 2023-11-28 2023-12-14T16:08:58 900.96 +3 300 15 3000000000 20.25 30 true Third C Gamma 2023-10-08 2023-10-08T16:15 345.67 +43 178 64 6969956763 40.98042 52.99882873140852 true Random C XQHYB 2023-12-11 2023-12-07T23:00:58 257.08 +69 416 14 7702410607 31.6389 89.57939043145311 true Random C URQMU 2023-11-25 2023-11-30T15:17:58 1379.22 82 107 51 1358006007 78.36581 46.09413324325159 true Random C IPNQU 2023-12-01 2023-12-14T05:41:58 417.17 -- !q47 -- -1 578 55 2111222273 56.858597 82.38111658179561 true Random C LYDUG 2023-12-17 2023-12-05T13:04:58 1393.11 -1 979 44 10163954251 28.827957 57.56879940298416 true Random Q DNRGE 2023-12-09 2023-12-10T20:21:58 1581.25 +1 578 55 2111222273 56.8586 82.38111658179561 true Random C LYDUG 2023-12-17 2023-12-05T13:04:58 1393.11 +1 979 44 10163954251 28.82796 57.56879940298416 true Random Q DNRGE 2023-12-09 2023-12-10T20:21:58 1581.25 10 1000 50 10000000000 55.25 65.75 false Tenth J Kappa 2023-10-15 2023-10-15T23:30 1012.34 -10 966 38 2203748112 45.555325 27.908447208440094 true Random W LFAGO 2023-12-14 2023-11-26T20:00:58 1898.68 -11 1100 55 11000000000 60.5 70.0 true Eleventh K Lambda 2023-10-16 2023-10-16T01:45 1123.45 -11 441 19 7370044350 74.261696 62.013817404758086 true Random D UYKZA 2023-12-23 2023-12-15T11:49:58 1805.14 -11 487 27 14556302216 85.33334 62.596750833474495 true Random E QMHJD 2023-12-23 2023-12-24T08:30:58 1491.22 +10 966 38 2203748112 45.55532 27.90844720844009 true Random W LFAGO 2023-12-14 2023-11-26T20:00:58 1898.68 +11 1100 55 11000000000 60.5 70 true Eleventh K Lambda 2023-10-16 2023-10-16T01:45 1123.45 +11 441 19 7370044350 74.2617 62.01381740475809 true Random D UYKZA 2023-12-23 2023-12-15T11:49:58 1805.14 +11 487 27 14556302216 85.33334 62.59675083347449 true Random E QMHJD 2023-12-23 2023-12-24T08:30:58 1491.22 12 1200 60 12000000000 65.75 75.25 false Twelfth L Mu 2023-10-17 2023-10-17T02:15 1234.56 13 1300 65 13000000000 70.0 80.5 true Thirteenth M Nu 2023-10-18 2023-10-18T03:30 1345.67 13 402 30 10851194313 74.82481 74.90108005771035 false Random F GEMMK 2023-11-27 2023-12-21T15:03:58 1643.55 -13 696 74 3370487489 84.544014 88.69976219408227 true Random H RTFJI 2023-11-23 2023-11-25T07:32:58 1761.50 -13 745 48 13047949175 51.168613 85.21972389262197 true Random A AYBWQ 2023-12-22 2023-12-22T16:25:58 1192.48 -13 859 65 7433576046 56.136265 34.87823331022725 false Random L CRFUF 2023-12-23 2023-12-12T15:05:58 1037.15 +13 696 74 3370487489 84.54401 88.69976219408227 true Random H RTFJI 2023-11-23 2023-11-25T07:32:58 1761.50 +13 745 48 13047949175 51.16861 85.21972389262197 true Random A AYBWQ 2023-12-22 2023-12-22T16:25:58 1192.48 +13 859 65 7433576046 56.13626 34.87823331022725 false Random L CRFUF 2023-12-23 2023-12-12T15:05:58 1037.15 14 1400 70 14000000000 75.25 85.75 false Fourteenth N Xi 2023-10-19 2023-10-19T04:45 1456.78 -14 195 17 2370700139 16.777058 64.81793301410002 false Random P IIGRE 2023-12-12 2023-12-14T22:40:58 1678.44 -14 966 65 7828602539 62.430664 68.85873133439297 true Random I VVOQH 2023-12-01 2023-12-06T00:54:58 1300.43 -15 1500 75 15000000000 80.5 90.0 true Fifteenth O Omicron 2023-10-20 2023-10-20T05:15 1567.89 -16 135 22 7901304568 43.944805 85.16901944253635 true Random K NUQEP 2023-11-29 2023-11-25T23:42:58 1440.74 +14 195 17 2370700139 16.77706 64.81793301410002 false Random P IIGRE 2023-12-12 2023-12-14T22:40:58 1678.44 +14 966 65 7828602539 62.43066 68.85873133439297 true Random I VVOQH 2023-12-01 2023-12-06T00:54:58 1300.43 +15 1500 75 15000000000 80.5 90 true Fifteenth O Omicron 2023-10-20 2023-10-20T05:15 1567.89 +16 135 22 7901304568 43.94481 85.16901944253635 true Random K NUQEP 2023-11-29 2023-11-25T23:42:58 1440.74 16 615 20 12294128025 77.37379 20.42772029677839 true Random U JHPOB 2023-11-30 2023-12-16T14:29:58 1105.33 -17 499 46 11230409207 51.632103 28.811164197154774 false Random V BVLUH 2023-12-13 2023-12-23T17:59:58 1387.62 -18 690 17 1399456103 63.261967 42.964715823771236 true Random R BWSRS 2023-12-13 2023-12-23T08:33:58 1840.02 -18 835 17 14265814864 18.923101 80.53531451138412 true Random V PIKUZ 2023-12-20 2023-12-21T07:39:58 1167.09 -19 917 66 2340946367 89.035675 22.649362455875274 false Random D HWHMU 2023-11-30 2023-12-10T02:36:58 1960.07 -19 993 13 7039833438 79.769066 69.79049291517285 true Random X OFSUV 2023-12-11 2023-12-08T01:46:58 1958.95 +17 499 46 11230409207 51.6321 28.81116419715477 false Random V BVLUH 2023-12-13 2023-12-23T17:59:58 1387.62 +18 690 17 1399456103 63.26197 42.96471582377124 true Random R BWSRS 2023-12-13 2023-12-23T08:33:58 1840.02 +18 835 17 14265814864 18.9231 80.53531451138412 true Random V PIKUZ 2023-12-20 2023-12-21T07:39:58 1167.09 +19 917 66 2340946367 89.03568 22.64936245587527 false Random D HWHMU 2023-11-30 2023-12-10T02:36:58 1960.07 +19 993 13 7039833438 79.76907 69.79049291517285 true Random X OFSUV 2023-12-11 2023-12-08T01:46:58 1958.95 2 921 62 8557914543 78.52379 58.6849882881372 false Random D KBXXS 2023-12-07 2023-12-02T22:24:58 1782.88 -2 925 46 6013180177 41.107002 34.86561026061906 true Random L XLLXY 2023-12-06 2023-12-09T14:04:58 1246.26 -20 248 64 7704906572 35.089928 76.69128821479936 true Random T KQOMS 2023-11-30 2023-12-11T01:35:58 1799.26 -22 200 41 12163439252 64.621254 81.68574929661384 true Random U KGVNU 2023-12-20 2023-11-30T14:56:58 1915.47 -23 192 8 5102667616 54.111057 40.85713971600841 false Random J EBXEB 2023-12-13 2023-12-10T11:32:58 1824.12 +2 925 46 6013180177 41.107 34.86561026061906 true Random L XLLXY 2023-12-06 2023-12-09T14:04:58 1246.26 +20 248 64 7704906572 35.08993 76.69128821479936 true Random T KQOMS 2023-11-30 2023-12-11T01:35:58 1799.26 +22 200 41 12163439252 64.62125 81.68574929661384 true Random U KGVNU 2023-12-20 2023-11-30T14:56:58 1915.47 +23 192 8 5102667616 54.11106 40.85713971600841 false Random J EBXEB 2023-12-13 2023-12-10T11:32:58 1824.12 27 866 24 5531365994 72.77447 86.96690821165853 false Random S TZPFJ 2023-11-28 2023-12-13T15:31:58 1274.75 -29 157 34 2302882987 51.924015 20.311140937696468 true Random R MBOXJ 2023-12-02 2023-12-03T14:12:58 1620.80 -29 923 57 1591814253 68.57371 33.342802789892986 true Random Q ZONGC 2023-12-20 2023-12-13T09:11:58 1465.38 -3 259 74 7422478791 22.291426 75.38227773520089 true Random S VWAXJ 2023-12-01 2023-12-05T21:23:58 1970.57 -30 292 71 10308444223 63.039078 76.40649540444898 false Random G DRLHY 2023-12-19 2023-12-14T15:32:58 1165.14 -30 830 65 12624057029 38.791172 59.72899174862661 false Random A LFPWP 2023-12-03 2023-12-17T00:10:58 1760.62 +29 157 34 2302882987 51.92402 20.31114093769647 true Random R MBOXJ 2023-12-02 2023-12-03T14:12:58 1620.80 +29 923 57 1591814253 68.57371 33.34280278989299 true Random Q ZONGC 2023-12-20 2023-12-13T09:11:58 1465.38 +3 259 74 7422478791 22.29143 75.38227773520089 true Random S VWAXJ 2023-12-01 2023-12-05T21:23:58 1970.57 +30 292 71 10308444223 63.03908 76.40649540444898 false Random G DRLHY 2023-12-19 2023-12-14T15:32:58 1165.14 +30 830 65 12624057029 38.79117 59.72899174862661 false Random A LFPWP 2023-12-03 2023-12-17T00:10:58 1760.62 31 395 22 6141426904 88.37914 52.0655270963123 false Random J DRPJV 2023-12-07 2023-11-29T03:15:58 1076.41 -31 990 5 13678786851 15.762894 85.24173385692956 false Random H THGIM 2023-12-14 2023-12-09T01:24:58 1834.37 -39 726 50 3865644066 26.225628 28.534393094364418 false Random F NIUCS 2023-12-05 2023-12-04T19:31:58 1953.82 +31 990 5 13678786851 15.76289 85.24173385692956 false Random H THGIM 2023-12-14 2023-12-09T01:24:58 1834.37 +39 726 50 3865644066 26.22563 28.53439309436442 false Random F NIUCS 2023-12-05 2023-12-04T19:31:58 1953.82 4 122 24 10738473173 81.15482 60.21481394154484 false Random Y PQJRK 2023-12-20 2023-12-09T02:38:58 1467.35 -4 569 72 10560903405 50.255936 47.535145739285184 false Random O NRIRC 2023-12-05 2023-12-01T09:10:58 1986.99 -40 230 34 10824964541 16.929768 53.812277279703366 false Random F YDQHF 2023-12-14 2023-12-03T17:42:58 1623.79 -40 914 7 4902128502 19.442041 33.099787387344406 true Random Q KOCWA 2023-11-28 2023-12-21T09:20:58 1824.80 -41 344 34 14536795918 56.660946 84.15108995619764 false Random Q KYLCH 2023-12-10 2023-12-04T08:25:58 1902.09 -41 697 21 1200243566 12.466168 68.57243624557165 true Random U JZGEG 2023-12-03 2023-12-10T04:51:58 1323.88 +4 569 72 10560903405 50.25594 47.53514573928518 false Random O NRIRC 2023-12-05 2023-12-01T09:10:58 1986.99 +40 230 34 10824964541 16.92977 53.81227727970337 false Random F YDQHF 2023-12-14 2023-12-03T17:42:58 1623.79 +40 914 7 4902128502 19.44204 33.09978738734441 true Random Q KOCWA 2023-11-28 2023-12-21T09:20:58 1824.80 +41 344 34 14536795918 56.66095 84.15108995619764 false Random Q KYLCH 2023-12-10 2023-12-04T08:25:58 1902.09 +41 697 21 1200243566 12.46617 68.57243624557165 true Random U JZGEG 2023-12-03 2023-12-10T04:51:58 1323.88 41 708 64 11745827370 72.84812 35.31028363777645 true Random O WGSQC 2023-12-02 2023-11-25T17:07:58 1666.71 42 178 38 7559404453 69.69449 64.37154501388798 true Random G QUMUN 2023-12-14 2023-12-17T01:37:58 1190.44 -42 192 28 14454791024 35.465202 46.34876515635648 false Random W NQFGR 2023-12-04 2023-11-24T05:02:58 1428.02 +42 192 28 14454791024 35.4652 46.34876515635648 false Random W NQFGR 2023-12-04 2023-11-24T05:02:58 1428.02 42 355 72 11536856285 74.42886 53.49032479461299 false Random I IQZEI 2023-12-10 2023-12-06T07:17:58 1098.14 44 219 38 8596488294 73.52956 94.10797854680568 true Random E HMWBI 2023-12-15 2023-12-06T00:51:58 1907.47 -44 694 55 3626514138 62.504086 72.89799265418553 true Random Z JTDVF 2023-12-01 2023-11-29T12:08:58 1769.92 -45 455 25 12639246000 47.011307 26.310712594958694 false Random Z GGEUA 2023-11-27 2023-12-01T20:41:58 1698.21 -45 492 43 3870916386 51.069588 42.652270406300794 true Random H JVZTB 2023-12-04 2023-12-09T21:06:58 1517.83 -47 508 48 1456473942 48.488297 20.377955902326608 false Random B CAOEY 2023-11-29 2023-12-10T14:49:58 1865.52 -47 566 50 1426586688 51.278687 40.47151456873397 true Random F YBOSH 2023-11-26 2023-12-15T03:44:58 1806.35 +44 694 55 3626514138 62.50409 72.89799265418553 true Random Z JTDVF 2023-12-01 2023-11-29T12:08:58 1769.92 +45 455 25 12639246000 47.01131 26.31071259495869 false Random Z GGEUA 2023-11-27 2023-12-01T20:41:58 1698.21 +45 492 43 3870916386 51.06959 42.65227040630079 true Random H JVZTB 2023-12-04 2023-12-09T21:06:58 1517.83 +47 508 48 1456473942 48.4883 20.37795590232661 false Random B CAOEY 2023-11-29 2023-12-10T14:49:58 1865.52 +47 566 50 1426586688 51.27869 40.47151456873397 true Random F YBOSH 2023-11-26 2023-12-15T03:44:58 1806.35 47 838 73 14910230294 83.69784 82.28901816600579 true Random L SHXYL 2023-11-24 2023-12-05T22:19:58 1062.15 48 898 59 12871187130 10.13838 70.19705104611333 true Random J WFXNN 2023-12-23 2023-12-17T02:53:58 1050.21 -49 412 16 8300982793 56.263252 66.07893608061771 false Random K DWWJI 2023-12-08 2023-12-17T11:32:58 1718.54 -49 568 70 2916596630 79.16303 56.114316916863025 false Random T ILLIU 2023-11-23 2023-12-07T11:05:58 1039.03 -5 768 5 4152322228 41.128906 78.60686390712706 false Random J LXKRA 2023-12-05 2023-11-24T18:13:58 1941.98 -5 823 63 13328808917 77.768196 22.87975226738422 false Random F OIYPV 2023-12-11 2023-12-14T06:43:58 1144.38 -52 811 31 14085958816 51.067017 65.01991893789116 true Random A CODYQ 2023-12-03 2023-12-07T23:25:58 1797.21 -54 827 55 7054839267 58.555687 25.891004802115663 false Random O ASMLW 2023-12-13 2023-12-20T16:41:58 1369.32 -54 843 34 9547939940 38.66475 36.370944299232434 true Random P NTVIR 2023-12-12 2023-12-02T06:45:58 1628.37 +49 412 16 8300982793 56.26325 66.07893608061771 false Random K DWWJI 2023-12-08 2023-12-17T11:32:58 1718.54 +49 568 70 2916596630 79.16303 56.11431691686303 false Random T ILLIU 2023-11-23 2023-12-07T11:05:58 1039.03 +5 768 5 4152322228 41.12891 78.60686390712706 false Random J LXKRA 2023-12-05 2023-11-24T18:13:58 1941.98 +5 823 63 13328808917 77.7682 22.87975226738422 false Random F OIYPV 2023-12-11 2023-12-14T06:43:58 1144.38 +52 811 31 14085958816 51.06702 65.01991893789116 true Random A CODYQ 2023-12-03 2023-12-07T23:25:58 1797.21 +54 827 55 7054839267 58.55569 25.89100480211566 false Random O ASMLW 2023-12-13 2023-12-20T16:41:58 1369.32 +54 843 34 9547939940 38.66475 36.37094429923243 true Random P NTVIR 2023-12-12 2023-12-02T06:45:58 1628.37 55 908 24 13623721787 40.06427 90.85281792731746 false Random B KFZGI 2023-11-27 2023-12-23T18:06:58 1124.95 -55 964 8 14038541765 70.24135 20.034551391620194 false Random J AYXIT 2023-12-13 2023-12-16T19:38:58 1476.73 -59 144 31 6208909394 67.417076 40.59765633709834 true Random D FLWNA 2023-12-12 2023-12-19T06:17:58 1870.24 +55 964 8 14038541765 70.24135 20.03455139162019 false Random J AYXIT 2023-12-13 2023-12-16T19:38:58 1476.73 +59 144 31 6208909394 67.41708 40.59765633709834 true Random D FLWNA 2023-12-12 2023-12-19T06:17:58 1870.24 59 509 50 5501336408 39.94401 73.35770882761237 true Random I PVZNO 2023-12-04 2023-11-27T04:40:58 1177.33 -60 711 69 1493870104 22.574188 61.30347648465907 false Random E FHKVR 2023-11-27 2023-12-05T11:26:58 1981.61 -62 451 50 12304139502 51.151623 22.46754141558852 false Random C SRRSV 2023-12-08 2023-12-20T02:48:58 1352.65 +60 711 69 1493870104 22.57419 61.30347648465907 false Random E FHKVR 2023-11-27 2023-12-05T11:26:58 1981.61 +62 451 50 12304139502 51.15162 22.46754141558852 false Random C SRRSV 2023-12-08 2023-12-20T02:48:58 1352.65 63 112 75 12197306353 85.90137 43.48931389222043 false Random C KKAIT 2023-11-27 2023-12-23T04:23:58 1954.90 -63 383 35 5161212745 39.455276 52.33267523851794 false Random X TMYMC 2023-11-29 2023-12-10T09:09:58 1442.54 -63 410 33 1767102777 72.260124 56.971483381024896 false Random B QXNSM 2023-12-12 2023-12-19T22:57:58 1660.73 -64 719 36 1224510454 64.237434 86.05689694804887 true Random E ZVQPU 2023-11-30 2023-12-03T04:56:58 1879.25 -66 306 5 14448160602 44.642223 50.24249889525751 false Random X OASEB 2023-12-11 2023-11-27T00:16:58 1345.69 -68 266 31 8183454755 69.19586 23.139304803938643 false Random S STCBM 2023-11-26 2023-12-22T13:42:58 1722.37 +63 383 35 5161212745 39.45528 52.33267523851794 false Random X TMYMC 2023-11-29 2023-12-10T09:09:58 1442.54 +63 410 33 1767102777 72.26012 56.9714833810249 false Random B QXNSM 2023-12-12 2023-12-19T22:57:58 1660.73 +64 719 36 1224510454 64.23743 86.05689694804887 true Random E ZVQPU 2023-11-30 2023-12-03T04:56:58 1879.25 +66 306 5 14448160602 44.64222 50.24249889525751 false Random X OASEB 2023-12-11 2023-11-27T00:16:58 1345.69 +68 266 31 8183454755 69.19586 23.13930480393864 false Random S STCBM 2023-11-26 2023-12-22T13:42:58 1722.37 68 756 63 5416393421 66.41538 76.32820339134415 false Random Y CUNAL 2023-12-23 2023-12-14T22:49:58 1109.25 -68 922 13 11664232196 72.683266 37.9910331525765 false Random W PPWBB 2023-11-26 2023-12-10T22:54:58 1968.89 -69 416 14 7702410607 31.638903 89.5793904314531 true Random C URQMU 2023-11-25 2023-11-30T15:17:58 1379.22 +68 922 13 11664232196 72.68327 37.9910331525765 false Random W PPWBB 2023-11-26 2023-12-10T22:54:58 1968.89 +69 416 14 7702410607 31.6389 89.57939043145311 true Random C URQMU 2023-11-25 2023-11-30T15:17:58 1379.22 7 969 62 3451343234 57.17074 56.74513811095188 false Random G OWDSC 2023-12-19 2023-12-11T17:17:58 1874.22 -70 231 67 4547989149 35.103123 51.93622592177748 true Random V ZBCVY 2023-11-29 2023-12-22T11:41:58 1749.60 -70 421 23 3153379289 27.412096 79.32006404438445 false Random L VLJWK 2023-12-04 2023-12-12T05:31:58 1163.35 +70 231 67 4547989149 35.10312 51.93622592177748 true Random V ZBCVY 2023-11-29 2023-12-22T11:41:58 1749.60 +70 421 23 3153379289 27.4121 79.32006404438445 false Random L VLJWK 2023-12-04 2023-12-12T05:31:58 1163.35 70 751 56 7828222634 52.8313 55.7263634552559 true Random B TFHMH 2023-11-30 2023-12-24T12:22:58 1166.13 -73 866 49 4618070115 46.803646 91.41305051885227 true Random H ROYYF 2023-12-07 2023-12-01T10:28:58 1817.67 -76 504 70 14161652666 58.071503 67.99111956708262 true Random Y HAVCK 2023-11-27 2023-12-14T16:08:58 1864.98 +73 866 49 4618070115 46.80365 91.41305051885227 true Random H ROYYF 2023-12-07 2023-12-01T10:28:58 1817.67 +76 504 70 14161652666 58.0715 67.99111956708262 true Random Y HAVCK 2023-11-27 2023-12-14T16:08:58 1864.98 77 131 19 2964167114 33.23181 53.35246738882714 false Random G AHGFO 2023-12-19 2023-12-01T10:11:58 1837.90 -77 165 36 12887722637 19.729382 45.61157603163882 true Random S OZOLB 2023-12-02 2023-12-03T05:07:58 1576.79 +77 165 36 12887722637 19.72938 45.61157603163882 true Random S OZOLB 2023-12-02 2023-12-03T05:07:58 1576.79 8 866 37 13672147880 81.28999 67.66548594336737 false Random H QDJIM 2023-12-14 2023-12-17T18:44:58 1112.05 -80 267 57 8797946135 35.604717 80.51381110359165 false Random K KQTEX 2023-12-09 2023-12-13T06:19:58 1769.15 +80 267 57 8797946135 35.60472 80.51381110359165 false Random K KQTEX 2023-12-09 2023-12-13T06:19:58 1769.15 82 603 60 9083469993 81.24088 44.46228092092543 true Random Y WTQGU 2023-11-30 2023-11-28T13:18:58 1448.45 -84 427 60 9035762847 81.971306 28.37315065501099 true Random L FETYF 2023-12-01 2023-11-24T15:00:58 1267.12 +84 427 60 9035762847 81.97131 28.37315065501099 true Random L FETYF 2023-12-01 2023-11-24T15:00:58 1267.12 85 375 63 6797318130 85.47522 58.16330728665678 true Random E UNZLS 2023-12-01 2023-12-04T05:17:58 1949.48 -85 873 18 7233488476 33.83051 31.655950581225508 false Random N RJTIB 2023-11-23 2023-12-11T15:07:58 1249.52 -86 398 27 13222936963 20.387327 44.51255195842424 true Random T ZCRFI 2023-12-21 2023-12-23T12:04:58 1801.53 +85 873 18 7233488476 33.83051 31.65595058122551 false Random N RJTIB 2023-11-23 2023-12-11T15:07:58 1249.52 +86 398 27 13222936963 20.38733 44.51255195842424 true Random T ZCRFI 2023-12-21 2023-12-23T12:04:58 1801.53 86 662 53 8875065706 28.64778 30.6775849729486 false Random N YNQAY 2023-12-15 2023-11-24T21:56:58 1108.35 -86 728 18 13390353484 61.060482 87.44751616093882 false Random J BUCVI 2023-12-07 2023-12-14T23:00:58 1611.17 -86 998 74 11080891106 82.568756 32.0122101203062 true Random K VAAMT 2023-12-23 2023-12-01T10:14:58 1708.39 +86 728 18 13390353484 61.06048 87.44751616093882 false Random J BUCVI 2023-12-07 2023-12-14T23:00:58 1611.17 +86 998 74 11080891106 82.56876 32.0122101203062 true Random K VAAMT 2023-12-23 2023-12-01T10:14:58 1708.39 87 145 64 9022533179 37.80205 63.26081178595084 true Random T PEOPK 2023-12-08 2023-12-07T17:41:58 1167.05 -87 641 64 4786767059 14.765089 70.8793353664754 false Random W SQHGN 2023-12-12 2023-12-24T01:19:58 1316.61 -88 728 59 8439434199 30.372904 59.410283344764366 false Random F JODWY 2023-12-04 2023-12-01T07:57:58 1753.88 +87 641 64 4786767059 14.76509 70.8793353664754 false Random W SQHGN 2023-12-12 2023-12-24T01:19:58 1316.61 +88 728 59 8439434199 30.3729 59.41028334476437 false Random F JODWY 2023-12-04 2023-12-01T07:57:58 1753.88 88 765 69 9753682777 83.42646 25.99260711248508 true Random M MEJAX 2023-11-25 2023-12-20T09:21:58 1647.22 -89 129 64 6400162051 67.910965 80.48074661432221 true Random Y ZXJWQ 2023-12-16 2023-12-19T10:23:58 1882.65 -89 964 41 12706120446 69.484116 32.39048200771184 true Random J IIRNY 2023-12-16 2023-11-29T01:54:58 1298.71 -9 113 7 6162580854 11.346889 46.82839094332704 false Random A SJTAF 2023-12-14 2023-11-23T18:27:58 1610.49 -91 389 11 14784237986 11.174142 27.692284427565397 true Random P DYILB 2023-12-14 2023-12-21T11:07:58 1175.73 +89 129 64 6400162051 67.91096 80.48074661432221 true Random Y ZXJWQ 2023-12-16 2023-12-19T10:23:58 1882.65 +89 964 41 12706120446 69.48412 32.39048200771184 true Random J IIRNY 2023-12-16 2023-11-29T01:54:58 1298.71 +9 113 7 6162580854 11.34689 46.82839094332704 false Random A SJTAF 2023-12-14 2023-11-23T18:27:58 1610.49 +91 389 11 14784237986 11.17414 27.6922844275654 true Random P DYILB 2023-12-14 2023-12-21T11:07:58 1175.73 91 528 68 14588592231 77.4651 88.92064181463138 false Random U JXZUA 2023-12-16 2023-12-21T02:28:58 1834.07 -92 344 29 5182139341 31.653255 44.26814517218887 true Random F NGHOS 2023-12-06 2023-12-09T21:25:58 1291.06 -94 216 49 8773264156 81.617195 43.03983700523827 true Random D VHWYT 2023-12-13 2023-11-30T07:03:58 1178.27 -94 693 60 4818659234 26.04229 83.2975107272106 true Random B ENSQO 2023-12-22 2023-12-12T06:08:58 1283.81 -96 595 72 11506136303 21.917727 74.74561804277158 true Random T SPLKA 2023-12-02 2023-11-30T00:39:58 1693.61 -96 637 39 5516035994 55.90832 60.522041012562816 true Random O YPETL 2023-12-02 2023-11-28T02:47:58 1175.16 -97 415 74 10346322649 21.667427 46.58901867647463 false Random R KWFOF 2023-12-21 2023-11-27T12:18:58 1157.72 +92 344 29 5182139341 31.65326 44.26814517218887 true Random F NGHOS 2023-12-06 2023-12-09T21:25:58 1291.06 +94 216 49 8773264156 81.6172 43.03983700523827 true Random D VHWYT 2023-12-13 2023-11-30T07:03:58 1178.27 +94 693 60 4818659234 26.04229 83.29751072721059 true Random B ENSQO 2023-12-22 2023-12-12T06:08:58 1283.81 +96 595 72 11506136303 21.91773 74.74561804277158 true Random T SPLKA 2023-12-02 2023-11-30T00:39:58 1693.61 +96 637 39 5516035994 55.90832 60.52204101256282 true Random O YPETL 2023-12-02 2023-11-28T02:47:58 1175.16 +97 415 74 10346322649 21.66743 46.58901867647463 false Random R KWFOF 2023-12-21 2023-11-27T12:18:58 1157.72 97 839 60 14818779777 46.17389 68.98285340004992 false Random W HMFPU 2023-12-01 2023-12-04T08:41:58 1683.48 -- !q48 -- 1 100 5 1000000000 10.5 20.75 true First A Alpha 2023-10-06 2023-10-06T14:30 123.45 10 1000 50 10000000000 55.25 65.75 false Tenth J Kappa 2023-10-15 2023-10-15T23:30 1012.34 -11 1100 55 11000000000 60.5 70.0 true Eleventh K Lambda 2023-10-16 2023-10-16T01:45 1123.45 +11 1100 55 11000000000 60.5 70 true Eleventh K Lambda 2023-10-16 2023-10-16T01:45 1123.45 12 1200 60 12000000000 65.75 75.25 false Twelfth L Mu 2023-10-17 2023-10-17T02:15 1234.56 13 1300 65 13000000000 70.0 80.5 true Thirteenth M Nu 2023-10-18 2023-10-18T03:30 1345.67 14 1400 70 14000000000 75.25 85.75 false Fourteenth N Xi 2023-10-19 2023-10-19T04:45 1456.78 -15 1500 75 15000000000 80.5 90.0 true Fifteenth O Omicron 2023-10-20 2023-10-20T05:15 1567.89 +15 1500 75 15000000000 80.5 90 true Fifteenth O Omicron 2023-10-20 2023-10-20T05:15 1567.89 2 200 10 2000000000 15.75 25.5 false Second B Beta 2023-10-07 2023-10-07T15:45 234.56 -3 300 15 3000000000 20.25 30.0 true Third C Gamma 2023-10-08 2023-10-08T16:15 345.67 +3 300 15 3000000000 20.25 30 true Third C Gamma 2023-10-08 2023-10-08T16:15 345.67 4 400 20 4000000000 25.5 35.25 false Fourth D Delta 2023-10-09 2023-10-09T17:30 456.78 5 500 25 5000000000 30.75 40.5 true Fifth E Epsilon 2023-10-10 2023-10-10T18:45 567.89 6 600 30 6000000000 35.25 45.75 false Sixth F Zeta 2023-10-11 2023-10-11T19:15 678.90 -7 700 35 7000000000 40.5 50.0 true Seventh G Eta 2023-10-12 2023-10-12T20:30 789.01 +7 700 35 7000000000 40.5 50 true Seventh G Eta 2023-10-12 2023-10-12T20:30 789.01 8 800 40 8000000000 45.75 55.25 false Eighth H Theta 2023-10-13 2023-10-13T21:45 890.12 9 900 45 9000000000 50.0 60.5 true Ninth I Iota 2023-10-14 2023-10-14T22:15 901.23 @@ -570,4 +524,3 @@ 438 491 21 66065079309 6.6624016E7 1.5542114222539822E10 false CEbvKZRdvMHxzVOIejq wJ eoTkUlht 2023-12-08 2023-12-17T19:49:48 86666.80 -- !lzo_8 -- - diff --git a/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out b/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out index fe8243f91e0a95..62fabbe7d08be7 100644 --- a/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out +++ b/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out @@ -650,654 +650,3 @@ true 8 8 8 80 8.8 80.8 7298 12/31/10 8 2010-12-31T12:08:13.780 2010 12 -- !schema_7 -- \N \N \N \N \N \N \N \N \N test test test 1 2 3 4 5.1 6.2 true false 2011-05-06 2011-05-06T07:08:09.123 -1.2 12.30 -1234.5678 123456789.12340000 -1234567890.12345678 1234567890123456789012.1234567800000000 dGVzdDI= --- !all_types_bool_col_topn_asc -- -false 1 1 1 10 1.1 10.1 1 01/01/09 1 2009-01-01T07:01 2009 1 -false 3 3 3 30 3.3 30.3 3 01/01/09 3 2009-01-01T07:03:00.300 2009 1 -false 5 5 5 50 5.5 50.5 5 01/01/09 5 2009-01-01T07:05:00.100 2009 1 -false 7 7 7 70 7.7 70.7 7 01/01/09 7 2009-01-01T07:07:00.210 2009 1 -false 9 9 9 90 9.9 90.89999999999999 9 01/01/09 9 2009-01-01T07:09:00.360 2009 1 -false 1 1 1 10 1.1 10.1 11 01/02/09 1 2009-01-02T07:11:00.450 2009 1 -false 3 3 3 30 3.3 30.3 13 01/02/09 3 2009-01-02T07:13:00.480 2009 1 -false 5 5 5 50 5.5 50.5 15 01/02/09 5 2009-01-02T07:15:00.550 2009 1 -false 7 7 7 70 7.7 70.7 17 01/02/09 7 2009-01-02T07:17:00.660 2009 1 -false 9 9 9 90 9.9 90.89999999999999 19 01/02/09 9 2009-01-02T07:19:00.810 2009 1 - --- !all_types_bool_col_topn_desc -- -true 8 8 8 80 8.8 80.8 7298 12/31/10 8 2010-12-31T12:08:13.780 2010 12 -true 6 6 6 60 6.6 60.59999999999999 7296 12/31/10 6 2010-12-31T12:06:13.650 2010 12 -true 4 4 4 40 4.4 40.4 7294 12/31/10 4 2010-12-31T12:04:13.560 2010 12 -true 2 2 2 20 2.2 20.2 7292 12/31/10 2 2010-12-31T12:02:13.510 2010 12 -true 0 0 0 0 0.0 0 7290 12/31/10 0 2010-12-31T12:00:13.500 2010 12 -true 8 8 8 80 8.8 80.8 7288 12/30/10 8 2010-12-30T11:58:13.330 2010 12 -true 6 6 6 60 6.6 60.59999999999999 7286 12/30/10 6 2010-12-30T11:56:13.200 2010 12 -true 4 4 4 40 4.4 40.4 7284 12/30/10 4 2010-12-30T11:54:13.110 2010 12 -true 2 2 2 20 2.2 20.2 7282 12/30/10 2 2010-12-30T11:52:13.600 2010 12 -true 0 0 0 0 0.0 0 7280 12/30/10 0 2010-12-30T11:50:13.500 2010 12 - --- !all_types_tinyint_col_topn_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -true 0 0 0 0 0.0 0 10 01/02/09 0 2009-01-02T07:10:00.450 2009 1 -true 0 0 0 0 0.0 0 20 01/03/09 0 2009-01-03T07:20:00.900 2009 1 -true 0 0 0 0 0.0 0 30 01/04/09 0 2009-01-04T07:30:01.350 2009 1 -true 0 0 0 0 0.0 0 40 01/05/09 0 2009-01-05T07:40:01.800 2009 1 -true 0 0 0 0 0.0 0 50 01/06/09 0 2009-01-06T07:50:02.250 2009 1 -true 0 0 0 0 0.0 0 60 01/07/09 0 2009-01-07T08:00:02.700 2009 1 -true 0 0 0 0 0.0 0 70 01/08/09 0 2009-01-08T08:10:03.150 2009 1 -true 0 0 0 0 0.0 0 80 01/09/09 0 2009-01-09T08:20:03.600 2009 1 -true 0 0 0 0 0.0 0 90 01/10/09 0 2009-01-10T08:30:04.500 2009 1 - --- !all_types_tinyint_col_topn_desc -- -false 9 9 9 90 9.9 90.89999999999999 7299 12/31/10 9 2010-12-31T12:09:13.860 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7289 12/30/10 9 2010-12-30T11:59:13.410 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7279 12/29/10 9 2010-12-29T11:49:12.960 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7269 12/28/10 9 2010-12-28T11:39:12.510 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7259 12/27/10 9 2010-12-27T11:29:12.600 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7249 12/26/10 9 2010-12-26T11:19:11.610 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7239 12/25/10 9 2010-12-25T11:09:11.160 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7229 12/24/10 9 2010-12-24T10:59:10.710 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7219 12/23/10 9 2010-12-23T10:49:10.260 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7209 12/22/10 9 2010-12-22T10:39:09.810 2010 12 - --- !all_types_smallint_col_topn_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -true 0 0 0 0 0.0 0 10 01/02/09 0 2009-01-02T07:10:00.450 2009 1 -true 0 0 0 0 0.0 0 20 01/03/09 0 2009-01-03T07:20:00.900 2009 1 -true 0 0 0 0 0.0 0 30 01/04/09 0 2009-01-04T07:30:01.350 2009 1 -true 0 0 0 0 0.0 0 40 01/05/09 0 2009-01-05T07:40:01.800 2009 1 -true 0 0 0 0 0.0 0 50 01/06/09 0 2009-01-06T07:50:02.250 2009 1 -true 0 0 0 0 0.0 0 60 01/07/09 0 2009-01-07T08:00:02.700 2009 1 -true 0 0 0 0 0.0 0 70 01/08/09 0 2009-01-08T08:10:03.150 2009 1 -true 0 0 0 0 0.0 0 80 01/09/09 0 2009-01-09T08:20:03.600 2009 1 -true 0 0 0 0 0.0 0 90 01/10/09 0 2009-01-10T08:30:04.500 2009 1 - --- !all_types_smallint_col_topn_desc -- -false 9 9 9 90 9.9 90.89999999999999 7299 12/31/10 9 2010-12-31T12:09:13.860 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7289 12/30/10 9 2010-12-30T11:59:13.410 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7279 12/29/10 9 2010-12-29T11:49:12.960 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7269 12/28/10 9 2010-12-28T11:39:12.510 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7259 12/27/10 9 2010-12-27T11:29:12.600 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7249 12/26/10 9 2010-12-26T11:19:11.610 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7239 12/25/10 9 2010-12-25T11:09:11.160 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7229 12/24/10 9 2010-12-24T10:59:10.710 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7219 12/23/10 9 2010-12-23T10:49:10.260 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7209 12/22/10 9 2010-12-22T10:39:09.810 2010 12 - --- !all_types_int_col_topn_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -true 0 0 0 0 0.0 0 10 01/02/09 0 2009-01-02T07:10:00.450 2009 1 -true 0 0 0 0 0.0 0 20 01/03/09 0 2009-01-03T07:20:00.900 2009 1 -true 0 0 0 0 0.0 0 30 01/04/09 0 2009-01-04T07:30:01.350 2009 1 -true 0 0 0 0 0.0 0 40 01/05/09 0 2009-01-05T07:40:01.800 2009 1 -true 0 0 0 0 0.0 0 50 01/06/09 0 2009-01-06T07:50:02.250 2009 1 -true 0 0 0 0 0.0 0 60 01/07/09 0 2009-01-07T08:00:02.700 2009 1 -true 0 0 0 0 0.0 0 70 01/08/09 0 2009-01-08T08:10:03.150 2009 1 -true 0 0 0 0 0.0 0 80 01/09/09 0 2009-01-09T08:20:03.600 2009 1 -true 0 0 0 0 0.0 0 90 01/10/09 0 2009-01-10T08:30:04.500 2009 1 - --- !all_types_int_col_topn_desc -- -false 9 9 9 90 9.9 90.89999999999999 7299 12/31/10 9 2010-12-31T12:09:13.860 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7289 12/30/10 9 2010-12-30T11:59:13.410 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7279 12/29/10 9 2010-12-29T11:49:12.960 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7269 12/28/10 9 2010-12-28T11:39:12.510 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7259 12/27/10 9 2010-12-27T11:29:12.600 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7249 12/26/10 9 2010-12-26T11:19:11.610 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7239 12/25/10 9 2010-12-25T11:09:11.160 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7229 12/24/10 9 2010-12-24T10:59:10.710 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7219 12/23/10 9 2010-12-23T10:49:10.260 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7209 12/22/10 9 2010-12-22T10:39:09.810 2010 12 - --- !all_types_bigint_col_topn_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -true 0 0 0 0 0.0 0 10 01/02/09 0 2009-01-02T07:10:00.450 2009 1 -true 0 0 0 0 0.0 0 20 01/03/09 0 2009-01-03T07:20:00.900 2009 1 -true 0 0 0 0 0.0 0 30 01/04/09 0 2009-01-04T07:30:01.350 2009 1 -true 0 0 0 0 0.0 0 40 01/05/09 0 2009-01-05T07:40:01.800 2009 1 -true 0 0 0 0 0.0 0 50 01/06/09 0 2009-01-06T07:50:02.250 2009 1 -true 0 0 0 0 0.0 0 60 01/07/09 0 2009-01-07T08:00:02.700 2009 1 -true 0 0 0 0 0.0 0 70 01/08/09 0 2009-01-08T08:10:03.150 2009 1 -true 0 0 0 0 0.0 0 80 01/09/09 0 2009-01-09T08:20:03.600 2009 1 -true 0 0 0 0 0.0 0 90 01/10/09 0 2009-01-10T08:30:04.500 2009 1 - --- !all_types_bigint_col_topn_desc -- -false 9 9 9 90 9.9 90.89999999999999 7299 12/31/10 9 2010-12-31T12:09:13.860 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7289 12/30/10 9 2010-12-30T11:59:13.410 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7279 12/29/10 9 2010-12-29T11:49:12.960 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7269 12/28/10 9 2010-12-28T11:39:12.510 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7259 12/27/10 9 2010-12-27T11:29:12.600 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7249 12/26/10 9 2010-12-26T11:19:11.610 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7239 12/25/10 9 2010-12-25T11:09:11.160 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7229 12/24/10 9 2010-12-24T10:59:10.710 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7219 12/23/10 9 2010-12-23T10:49:10.260 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7209 12/22/10 9 2010-12-22T10:39:09.810 2010 12 - --- !all_types_float_col_topn_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -true 0 0 0 0 0.0 0 10 01/02/09 0 2009-01-02T07:10:00.450 2009 1 -true 0 0 0 0 0.0 0 20 01/03/09 0 2009-01-03T07:20:00.900 2009 1 -true 0 0 0 0 0.0 0 30 01/04/09 0 2009-01-04T07:30:01.350 2009 1 -true 0 0 0 0 0.0 0 40 01/05/09 0 2009-01-05T07:40:01.800 2009 1 -true 0 0 0 0 0.0 0 50 01/06/09 0 2009-01-06T07:50:02.250 2009 1 -true 0 0 0 0 0.0 0 60 01/07/09 0 2009-01-07T08:00:02.700 2009 1 -true 0 0 0 0 0.0 0 70 01/08/09 0 2009-01-08T08:10:03.150 2009 1 -true 0 0 0 0 0.0 0 80 01/09/09 0 2009-01-09T08:20:03.600 2009 1 -true 0 0 0 0 0.0 0 90 01/10/09 0 2009-01-10T08:30:04.500 2009 1 - --- !all_types_float_col_topn_desc -- -false 9 9 9 90 9.9 90.89999999999999 7299 12/31/10 9 2010-12-31T12:09:13.860 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7289 12/30/10 9 2010-12-30T11:59:13.410 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7279 12/29/10 9 2010-12-29T11:49:12.960 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7269 12/28/10 9 2010-12-28T11:39:12.510 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7259 12/27/10 9 2010-12-27T11:29:12.600 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7249 12/26/10 9 2010-12-26T11:19:11.610 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7239 12/25/10 9 2010-12-25T11:09:11.160 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7229 12/24/10 9 2010-12-24T10:59:10.710 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7219 12/23/10 9 2010-12-23T10:49:10.260 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7209 12/22/10 9 2010-12-22T10:39:09.810 2010 12 - --- !all_types_double_col_topn_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -true 0 0 0 0 0.0 0 10 01/02/09 0 2009-01-02T07:10:00.450 2009 1 -true 0 0 0 0 0.0 0 20 01/03/09 0 2009-01-03T07:20:00.900 2009 1 -true 0 0 0 0 0.0 0 30 01/04/09 0 2009-01-04T07:30:01.350 2009 1 -true 0 0 0 0 0.0 0 40 01/05/09 0 2009-01-05T07:40:01.800 2009 1 -true 0 0 0 0 0.0 0 50 01/06/09 0 2009-01-06T07:50:02.250 2009 1 -true 0 0 0 0 0.0 0 60 01/07/09 0 2009-01-07T08:00:02.700 2009 1 -true 0 0 0 0 0.0 0 70 01/08/09 0 2009-01-08T08:10:03.150 2009 1 -true 0 0 0 0 0.0 0 80 01/09/09 0 2009-01-09T08:20:03.600 2009 1 -true 0 0 0 0 0.0 0 90 01/10/09 0 2009-01-10T08:30:04.500 2009 1 - --- !all_types_double_col_topn_desc -- -false 9 9 9 90 9.9 90.89999999999999 7299 12/31/10 9 2010-12-31T12:09:13.860 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7289 12/30/10 9 2010-12-30T11:59:13.410 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7279 12/29/10 9 2010-12-29T11:49:12.960 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7269 12/28/10 9 2010-12-28T11:39:12.510 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7259 12/27/10 9 2010-12-27T11:29:12.600 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7249 12/26/10 9 2010-12-26T11:19:11.610 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7239 12/25/10 9 2010-12-25T11:09:11.160 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7229 12/24/10 9 2010-12-24T10:59:10.710 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7219 12/23/10 9 2010-12-23T10:49:10.260 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7209 12/22/10 9 2010-12-22T10:39:09.810 2010 12 - --- !all_types_id_topn_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -false 1 1 1 10 1.1 10.1 1 01/01/09 1 2009-01-01T07:01 2009 1 -true 2 2 2 20 2.2 20.2 2 01/01/09 2 2009-01-01T07:02:00.100 2009 1 -false 3 3 3 30 3.3 30.3 3 01/01/09 3 2009-01-01T07:03:00.300 2009 1 -true 4 4 4 40 4.4 40.4 4 01/01/09 4 2009-01-01T07:04:00.600 2009 1 -false 5 5 5 50 5.5 50.5 5 01/01/09 5 2009-01-01T07:05:00.100 2009 1 -true 6 6 6 60 6.6 60.59999999999999 6 01/01/09 6 2009-01-01T07:06:00.150 2009 1 -false 7 7 7 70 7.7 70.7 7 01/01/09 7 2009-01-01T07:07:00.210 2009 1 -true 8 8 8 80 8.8 80.8 8 01/01/09 8 2009-01-01T07:08:00.280 2009 1 -false 9 9 9 90 9.9 90.89999999999999 9 01/01/09 9 2009-01-01T07:09:00.360 2009 1 - --- !all_types_id_topn_desc -- -false 9 9 9 90 9.9 90.89999999999999 7299 12/31/10 9 2010-12-31T12:09:13.860 2010 12 -true 8 8 8 80 8.8 80.8 7298 12/31/10 8 2010-12-31T12:08:13.780 2010 12 -false 7 7 7 70 7.7 70.7 7297 12/31/10 7 2010-12-31T12:07:13.710 2010 12 -true 6 6 6 60 6.6 60.59999999999999 7296 12/31/10 6 2010-12-31T12:06:13.650 2010 12 -false 5 5 5 50 5.5 50.5 7295 12/31/10 5 2010-12-31T12:05:13.600 2010 12 -true 4 4 4 40 4.4 40.4 7294 12/31/10 4 2010-12-31T12:04:13.560 2010 12 -false 3 3 3 30 3.3 30.3 7293 12/31/10 3 2010-12-31T12:03:13.530 2010 12 -true 2 2 2 20 2.2 20.2 7292 12/31/10 2 2010-12-31T12:02:13.510 2010 12 -false 1 1 1 10 1.1 10.1 7291 12/31/10 1 2010-12-31T12:01:13.500 2010 12 -true 0 0 0 0 0.0 0 7290 12/31/10 0 2010-12-31T12:00:13.500 2010 12 - --- !all_types_date_string_col_topn_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -false 1 1 1 10 1.1 10.1 1 01/01/09 1 2009-01-01T07:01 2009 1 -true 2 2 2 20 2.2 20.2 2 01/01/09 2 2009-01-01T07:02:00.100 2009 1 -false 3 3 3 30 3.3 30.3 3 01/01/09 3 2009-01-01T07:03:00.300 2009 1 -true 4 4 4 40 4.4 40.4 4 01/01/09 4 2009-01-01T07:04:00.600 2009 1 -false 5 5 5 50 5.5 50.5 5 01/01/09 5 2009-01-01T07:05:00.100 2009 1 -true 6 6 6 60 6.6 60.59999999999999 6 01/01/09 6 2009-01-01T07:06:00.150 2009 1 -false 7 7 7 70 7.7 70.7 7 01/01/09 7 2009-01-01T07:07:00.210 2009 1 -true 8 8 8 80 8.8 80.8 8 01/01/09 8 2009-01-01T07:08:00.280 2009 1 -false 9 9 9 90 9.9 90.89999999999999 9 01/01/09 9 2009-01-01T07:09:00.360 2009 1 - --- !all_types_date_string_col_topn_desc -- -false 9 9 9 90 9.9 90.89999999999999 7299 12/31/10 9 2010-12-31T12:09:13.860 2010 12 -true 8 8 8 80 8.8 80.8 7298 12/31/10 8 2010-12-31T12:08:13.780 2010 12 -false 7 7 7 70 7.7 70.7 7297 12/31/10 7 2010-12-31T12:07:13.710 2010 12 -true 6 6 6 60 6.6 60.59999999999999 7296 12/31/10 6 2010-12-31T12:06:13.650 2010 12 -false 5 5 5 50 5.5 50.5 7295 12/31/10 5 2010-12-31T12:05:13.600 2010 12 -true 4 4 4 40 4.4 40.4 7294 12/31/10 4 2010-12-31T12:04:13.560 2010 12 -false 3 3 3 30 3.3 30.3 7293 12/31/10 3 2010-12-31T12:03:13.530 2010 12 -true 2 2 2 20 2.2 20.2 7292 12/31/10 2 2010-12-31T12:02:13.510 2010 12 -false 1 1 1 10 1.1 10.1 7291 12/31/10 1 2010-12-31T12:01:13.500 2010 12 -true 0 0 0 0 0.0 0 7290 12/31/10 0 2010-12-31T12:00:13.500 2010 12 - --- !all_types_string_col_topn_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -true 0 0 0 0 0.0 0 10 01/02/09 0 2009-01-02T07:10:00.450 2009 1 -true 0 0 0 0 0.0 0 20 01/03/09 0 2009-01-03T07:20:00.900 2009 1 -true 0 0 0 0 0.0 0 30 01/04/09 0 2009-01-04T07:30:01.350 2009 1 -true 0 0 0 0 0.0 0 40 01/05/09 0 2009-01-05T07:40:01.800 2009 1 -true 0 0 0 0 0.0 0 50 01/06/09 0 2009-01-06T07:50:02.250 2009 1 -true 0 0 0 0 0.0 0 60 01/07/09 0 2009-01-07T08:00:02.700 2009 1 -true 0 0 0 0 0.0 0 70 01/08/09 0 2009-01-08T08:10:03.150 2009 1 -true 0 0 0 0 0.0 0 80 01/09/09 0 2009-01-09T08:20:03.600 2009 1 -true 0 0 0 0 0.0 0 90 01/10/09 0 2009-01-10T08:30:04.500 2009 1 - --- !all_types_string_col_topn_desc -- -false 9 9 9 90 9.9 90.89999999999999 7299 12/31/10 9 2010-12-31T12:09:13.860 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7289 12/30/10 9 2010-12-30T11:59:13.410 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7279 12/29/10 9 2010-12-29T11:49:12.960 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7269 12/28/10 9 2010-12-28T11:39:12.510 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7259 12/27/10 9 2010-12-27T11:29:12.600 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7249 12/26/10 9 2010-12-26T11:19:11.610 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7239 12/25/10 9 2010-12-25T11:09:11.160 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7229 12/24/10 9 2010-12-24T10:59:10.710 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7219 12/23/10 9 2010-12-23T10:49:10.260 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7209 12/22/10 9 2010-12-22T10:39:09.810 2010 12 - --- !all_types_timestamp_col_topn_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -false 1 1 1 10 1.1 10.1 1 01/01/09 1 2009-01-01T07:01 2009 1 -true 2 2 2 20 2.2 20.2 2 01/01/09 2 2009-01-01T07:02:00.100 2009 1 -false 3 3 3 30 3.3 30.3 3 01/01/09 3 2009-01-01T07:03:00.300 2009 1 -true 4 4 4 40 4.4 40.4 4 01/01/09 4 2009-01-01T07:04:00.600 2009 1 -false 5 5 5 50 5.5 50.5 5 01/01/09 5 2009-01-01T07:05:00.100 2009 1 -true 6 6 6 60 6.6 60.59999999999999 6 01/01/09 6 2009-01-01T07:06:00.150 2009 1 -false 7 7 7 70 7.7 70.7 7 01/01/09 7 2009-01-01T07:07:00.210 2009 1 -true 8 8 8 80 8.8 80.8 8 01/01/09 8 2009-01-01T07:08:00.280 2009 1 -false 9 9 9 90 9.9 90.89999999999999 9 01/01/09 9 2009-01-01T07:09:00.360 2009 1 - --- !all_types_timestamp_col_topn_desc -- -false 9 9 9 90 9.9 90.89999999999999 7299 12/31/10 9 2010-12-31T12:09:13.860 2010 12 -true 8 8 8 80 8.8 80.8 7298 12/31/10 8 2010-12-31T12:08:13.780 2010 12 -false 7 7 7 70 7.7 70.7 7297 12/31/10 7 2010-12-31T12:07:13.710 2010 12 -true 6 6 6 60 6.6 60.59999999999999 7296 12/31/10 6 2010-12-31T12:06:13.650 2010 12 -false 5 5 5 50 5.5 50.5 7295 12/31/10 5 2010-12-31T12:05:13.600 2010 12 -true 4 4 4 40 4.4 40.4 7294 12/31/10 4 2010-12-31T12:04:13.560 2010 12 -false 3 3 3 30 3.3 30.3 7293 12/31/10 3 2010-12-31T12:03:13.530 2010 12 -true 2 2 2 20 2.2 20.2 7292 12/31/10 2 2010-12-31T12:02:13.510 2010 12 -false 1 1 1 10 1.1 10.1 7291 12/31/10 1 2010-12-31T12:01:13.500 2010 12 -true 0 0 0 0 0.0 0 7290 12/31/10 0 2010-12-31T12:00:13.500 2010 12 - --- !all_types_year_topn_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -false 1 1 1 10 1.1 10.1 1 01/01/09 1 2009-01-01T07:01 2009 1 -true 2 2 2 20 2.2 20.2 2 01/01/09 2 2009-01-01T07:02:00.100 2009 1 -false 3 3 3 30 3.3 30.3 3 01/01/09 3 2009-01-01T07:03:00.300 2009 1 -true 4 4 4 40 4.4 40.4 4 01/01/09 4 2009-01-01T07:04:00.600 2009 1 -false 5 5 5 50 5.5 50.5 5 01/01/09 5 2009-01-01T07:05:00.100 2009 1 -true 6 6 6 60 6.6 60.59999999999999 6 01/01/09 6 2009-01-01T07:06:00.150 2009 1 -false 7 7 7 70 7.7 70.7 7 01/01/09 7 2009-01-01T07:07:00.210 2009 1 -true 8 8 8 80 8.8 80.8 8 01/01/09 8 2009-01-01T07:08:00.280 2009 1 -false 9 9 9 90 9.9 90.89999999999999 9 01/01/09 9 2009-01-01T07:09:00.360 2009 1 - --- !all_types_year_topn_desc -- -false 9 9 9 90 9.9 90.89999999999999 7299 12/31/10 9 2010-12-31T12:09:13.860 2010 12 -true 8 8 8 80 8.8 80.8 7298 12/31/10 8 2010-12-31T12:08:13.780 2010 12 -false 7 7 7 70 7.7 70.7 7297 12/31/10 7 2010-12-31T12:07:13.710 2010 12 -true 6 6 6 60 6.6 60.59999999999999 7296 12/31/10 6 2010-12-31T12:06:13.650 2010 12 -false 5 5 5 50 5.5 50.5 7295 12/31/10 5 2010-12-31T12:05:13.600 2010 12 -true 4 4 4 40 4.4 40.4 7294 12/31/10 4 2010-12-31T12:04:13.560 2010 12 -false 3 3 3 30 3.3 30.3 7293 12/31/10 3 2010-12-31T12:03:13.530 2010 12 -true 2 2 2 20 2.2 20.2 7292 12/31/10 2 2010-12-31T12:02:13.510 2010 12 -false 1 1 1 10 1.1 10.1 7291 12/31/10 1 2010-12-31T12:01:13.500 2010 12 -true 0 0 0 0 0.0 0 7290 12/31/10 0 2010-12-31T12:00:13.500 2010 12 - --- !all_types_month_topn_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -false 1 1 1 10 1.1 10.1 1 01/01/09 1 2009-01-01T07:01 2009 1 -true 2 2 2 20 2.2 20.2 2 01/01/09 2 2009-01-01T07:02:00.100 2009 1 -false 3 3 3 30 3.3 30.3 3 01/01/09 3 2009-01-01T07:03:00.300 2009 1 -true 4 4 4 40 4.4 40.4 4 01/01/09 4 2009-01-01T07:04:00.600 2009 1 -false 5 5 5 50 5.5 50.5 5 01/01/09 5 2009-01-01T07:05:00.100 2009 1 -true 6 6 6 60 6.6 60.59999999999999 6 01/01/09 6 2009-01-01T07:06:00.150 2009 1 -false 7 7 7 70 7.7 70.7 7 01/01/09 7 2009-01-01T07:07:00.210 2009 1 -true 8 8 8 80 8.8 80.8 8 01/01/09 8 2009-01-01T07:08:00.280 2009 1 -false 9 9 9 90 9.9 90.89999999999999 9 01/01/09 9 2009-01-01T07:09:00.360 2009 1 - --- !all_types_month_topn_desc -- -false 9 9 9 90 9.9 90.89999999999999 7299 12/31/10 9 2010-12-31T12:09:13.860 2010 12 -true 8 8 8 80 8.8 80.8 7298 12/31/10 8 2010-12-31T12:08:13.780 2010 12 -false 7 7 7 70 7.7 70.7 7297 12/31/10 7 2010-12-31T12:07:13.710 2010 12 -true 6 6 6 60 6.6 60.59999999999999 7296 12/31/10 6 2010-12-31T12:06:13.650 2010 12 -false 5 5 5 50 5.5 50.5 7295 12/31/10 5 2010-12-31T12:05:13.600 2010 12 -true 4 4 4 40 4.4 40.4 7294 12/31/10 4 2010-12-31T12:04:13.560 2010 12 -false 3 3 3 30 3.3 30.3 7293 12/31/10 3 2010-12-31T12:03:13.530 2010 12 -true 2 2 2 20 2.2 20.2 7292 12/31/10 2 2010-12-31T12:02:13.510 2010 12 -false 1 1 1 10 1.1 10.1 7291 12/31/10 1 2010-12-31T12:01:13.500 2010 12 -true 0 0 0 0 0.0 0 7290 12/31/10 0 2010-12-31T12:00:13.500 2010 12 - --- !all_types_bool_col_topn_abs_asc -- -false 1 1 1 10 1.1 10.1 1 01/01/09 1 2009-01-01T07:01 2009 1 -false 3 3 3 30 3.3 30.3 3 01/01/09 3 2009-01-01T07:03:00.300 2009 1 -false 5 5 5 50 5.5 50.5 5 01/01/09 5 2009-01-01T07:05:00.100 2009 1 -false 7 7 7 70 7.7 70.7 7 01/01/09 7 2009-01-01T07:07:00.210 2009 1 -false 9 9 9 90 9.9 90.89999999999999 9 01/01/09 9 2009-01-01T07:09:00.360 2009 1 -false 1 1 1 10 1.1 10.1 11 01/02/09 1 2009-01-02T07:11:00.450 2009 1 -false 3 3 3 30 3.3 30.3 13 01/02/09 3 2009-01-02T07:13:00.480 2009 1 -false 5 5 5 50 5.5 50.5 15 01/02/09 5 2009-01-02T07:15:00.550 2009 1 -false 7 7 7 70 7.7 70.7 17 01/02/09 7 2009-01-02T07:17:00.660 2009 1 -false 9 9 9 90 9.9 90.89999999999999 19 01/02/09 9 2009-01-02T07:19:00.810 2009 1 - --- !all_types_bool_col_topn_abs_desc -- -false 9 9 9 90 9.9 90.89999999999999 7299 12/31/10 9 2010-12-31T12:09:13.860 2010 12 -false 7 7 7 70 7.7 70.7 7297 12/31/10 7 2010-12-31T12:07:13.710 2010 12 -false 5 5 5 50 5.5 50.5 7295 12/31/10 5 2010-12-31T12:05:13.600 2010 12 -false 3 3 3 30 3.3 30.3 7293 12/31/10 3 2010-12-31T12:03:13.530 2010 12 -false 1 1 1 10 1.1 10.1 7291 12/31/10 1 2010-12-31T12:01:13.500 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7289 12/30/10 9 2010-12-30T11:59:13.410 2010 12 -false 7 7 7 70 7.7 70.7 7287 12/30/10 7 2010-12-30T11:57:13.260 2010 12 -false 5 5 5 50 5.5 50.5 7285 12/30/10 5 2010-12-30T11:55:13.150 2010 12 -false 3 3 3 30 3.3 30.3 7283 12/30/10 3 2010-12-30T11:53:13.800 2010 12 -false 1 1 1 10 1.1 10.1 7281 12/30/10 1 2010-12-30T11:51:13.500 2010 12 - --- !all_types_tinyint_col_topn_abs_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -true 0 0 0 0 0.0 0 10 01/02/09 0 2009-01-02T07:10:00.450 2009 1 -true 0 0 0 0 0.0 0 20 01/03/09 0 2009-01-03T07:20:00.900 2009 1 -true 0 0 0 0 0.0 0 30 01/04/09 0 2009-01-04T07:30:01.350 2009 1 -true 0 0 0 0 0.0 0 40 01/05/09 0 2009-01-05T07:40:01.800 2009 1 -true 0 0 0 0 0.0 0 50 01/06/09 0 2009-01-06T07:50:02.250 2009 1 -true 0 0 0 0 0.0 0 60 01/07/09 0 2009-01-07T08:00:02.700 2009 1 -true 0 0 0 0 0.0 0 70 01/08/09 0 2009-01-08T08:10:03.150 2009 1 -true 0 0 0 0 0.0 0 80 01/09/09 0 2009-01-09T08:20:03.600 2009 1 -true 0 0 0 0 0.0 0 90 01/10/09 0 2009-01-10T08:30:04.500 2009 1 - --- !all_types_tinyint_col_topn_abs_desc -- -true 0 0 0 0 0.0 0 7290 12/31/10 0 2010-12-31T12:00:13.500 2010 12 -true 0 0 0 0 0.0 0 7280 12/30/10 0 2010-12-30T11:50:13.500 2010 12 -true 0 0 0 0 0.0 0 7270 12/29/10 0 2010-12-29T11:40:12.600 2010 12 -true 0 0 0 0 0.0 0 7260 12/28/10 0 2010-12-28T11:30:12.150 2010 12 -true 0 0 0 0 0.0 0 7250 12/27/10 0 2010-12-27T11:20:11.700 2010 12 -true 0 0 0 0 0.0 0 7240 12/26/10 0 2010-12-26T11:10:11.250 2010 12 -true 0 0 0 0 0.0 0 7230 12/25/10 0 2010-12-25T11:00:10.800 2010 12 -true 0 0 0 0 0.0 0 7220 12/24/10 0 2010-12-24T10:50:10.350 2010 12 -true 0 0 0 0 0.0 0 7210 12/23/10 0 2010-12-23T10:40:09.900 2010 12 -true 0 0 0 0 0.0 0 7200 12/22/10 0 2010-12-22T10:30:09.450 2010 12 - --- !all_types_smallint_col_topn_abs_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -true 0 0 0 0 0.0 0 10 01/02/09 0 2009-01-02T07:10:00.450 2009 1 -true 0 0 0 0 0.0 0 20 01/03/09 0 2009-01-03T07:20:00.900 2009 1 -true 0 0 0 0 0.0 0 30 01/04/09 0 2009-01-04T07:30:01.350 2009 1 -true 0 0 0 0 0.0 0 40 01/05/09 0 2009-01-05T07:40:01.800 2009 1 -true 0 0 0 0 0.0 0 50 01/06/09 0 2009-01-06T07:50:02.250 2009 1 -true 0 0 0 0 0.0 0 60 01/07/09 0 2009-01-07T08:00:02.700 2009 1 -true 0 0 0 0 0.0 0 70 01/08/09 0 2009-01-08T08:10:03.150 2009 1 -true 0 0 0 0 0.0 0 80 01/09/09 0 2009-01-09T08:20:03.600 2009 1 -true 0 0 0 0 0.0 0 90 01/10/09 0 2009-01-10T08:30:04.500 2009 1 - --- !all_types_smallint_col_topn_abs_desc -- -true 0 0 0 0 0.0 0 7290 12/31/10 0 2010-12-31T12:00:13.500 2010 12 -true 0 0 0 0 0.0 0 7280 12/30/10 0 2010-12-30T11:50:13.500 2010 12 -true 0 0 0 0 0.0 0 7270 12/29/10 0 2010-12-29T11:40:12.600 2010 12 -true 0 0 0 0 0.0 0 7260 12/28/10 0 2010-12-28T11:30:12.150 2010 12 -true 0 0 0 0 0.0 0 7250 12/27/10 0 2010-12-27T11:20:11.700 2010 12 -true 0 0 0 0 0.0 0 7240 12/26/10 0 2010-12-26T11:10:11.250 2010 12 -true 0 0 0 0 0.0 0 7230 12/25/10 0 2010-12-25T11:00:10.800 2010 12 -true 0 0 0 0 0.0 0 7220 12/24/10 0 2010-12-24T10:50:10.350 2010 12 -true 0 0 0 0 0.0 0 7210 12/23/10 0 2010-12-23T10:40:09.900 2010 12 -true 0 0 0 0 0.0 0 7200 12/22/10 0 2010-12-22T10:30:09.450 2010 12 - --- !all_types_int_col_topn_abs_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -true 0 0 0 0 0.0 0 10 01/02/09 0 2009-01-02T07:10:00.450 2009 1 -true 0 0 0 0 0.0 0 20 01/03/09 0 2009-01-03T07:20:00.900 2009 1 -true 0 0 0 0 0.0 0 30 01/04/09 0 2009-01-04T07:30:01.350 2009 1 -true 0 0 0 0 0.0 0 40 01/05/09 0 2009-01-05T07:40:01.800 2009 1 -true 0 0 0 0 0.0 0 50 01/06/09 0 2009-01-06T07:50:02.250 2009 1 -true 0 0 0 0 0.0 0 60 01/07/09 0 2009-01-07T08:00:02.700 2009 1 -true 0 0 0 0 0.0 0 70 01/08/09 0 2009-01-08T08:10:03.150 2009 1 -true 0 0 0 0 0.0 0 80 01/09/09 0 2009-01-09T08:20:03.600 2009 1 -true 0 0 0 0 0.0 0 90 01/10/09 0 2009-01-10T08:30:04.500 2009 1 - --- !all_types_int_col_topn_abs_desc -- -true 0 0 0 0 0.0 0 7290 12/31/10 0 2010-12-31T12:00:13.500 2010 12 -true 0 0 0 0 0.0 0 7280 12/30/10 0 2010-12-30T11:50:13.500 2010 12 -true 0 0 0 0 0.0 0 7270 12/29/10 0 2010-12-29T11:40:12.600 2010 12 -true 0 0 0 0 0.0 0 7260 12/28/10 0 2010-12-28T11:30:12.150 2010 12 -true 0 0 0 0 0.0 0 7250 12/27/10 0 2010-12-27T11:20:11.700 2010 12 -true 0 0 0 0 0.0 0 7240 12/26/10 0 2010-12-26T11:10:11.250 2010 12 -true 0 0 0 0 0.0 0 7230 12/25/10 0 2010-12-25T11:00:10.800 2010 12 -true 0 0 0 0 0.0 0 7220 12/24/10 0 2010-12-24T10:50:10.350 2010 12 -true 0 0 0 0 0.0 0 7210 12/23/10 0 2010-12-23T10:40:09.900 2010 12 -true 0 0 0 0 0.0 0 7200 12/22/10 0 2010-12-22T10:30:09.450 2010 12 - --- !all_types_bigint_col_topn_abs_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -true 0 0 0 0 0.0 0 10 01/02/09 0 2009-01-02T07:10:00.450 2009 1 -true 0 0 0 0 0.0 0 20 01/03/09 0 2009-01-03T07:20:00.900 2009 1 -true 0 0 0 0 0.0 0 30 01/04/09 0 2009-01-04T07:30:01.350 2009 1 -true 0 0 0 0 0.0 0 40 01/05/09 0 2009-01-05T07:40:01.800 2009 1 -true 0 0 0 0 0.0 0 50 01/06/09 0 2009-01-06T07:50:02.250 2009 1 -true 0 0 0 0 0.0 0 60 01/07/09 0 2009-01-07T08:00:02.700 2009 1 -true 0 0 0 0 0.0 0 70 01/08/09 0 2009-01-08T08:10:03.150 2009 1 -true 0 0 0 0 0.0 0 80 01/09/09 0 2009-01-09T08:20:03.600 2009 1 -true 0 0 0 0 0.0 0 90 01/10/09 0 2009-01-10T08:30:04.500 2009 1 - --- !all_types_bigint_col_topn_abs_desc -- -true 0 0 0 0 0.0 0 7290 12/31/10 0 2010-12-31T12:00:13.500 2010 12 -true 0 0 0 0 0.0 0 7280 12/30/10 0 2010-12-30T11:50:13.500 2010 12 -true 0 0 0 0 0.0 0 7270 12/29/10 0 2010-12-29T11:40:12.600 2010 12 -true 0 0 0 0 0.0 0 7260 12/28/10 0 2010-12-28T11:30:12.150 2010 12 -true 0 0 0 0 0.0 0 7250 12/27/10 0 2010-12-27T11:20:11.700 2010 12 -true 0 0 0 0 0.0 0 7240 12/26/10 0 2010-12-26T11:10:11.250 2010 12 -true 0 0 0 0 0.0 0 7230 12/25/10 0 2010-12-25T11:00:10.800 2010 12 -true 0 0 0 0 0.0 0 7220 12/24/10 0 2010-12-24T10:50:10.350 2010 12 -true 0 0 0 0 0.0 0 7210 12/23/10 0 2010-12-23T10:40:09.900 2010 12 -true 0 0 0 0 0.0 0 7200 12/22/10 0 2010-12-22T10:30:09.450 2010 12 - --- !all_types_float_col_topn_abs_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -true 0 0 0 0 0.0 0 10 01/02/09 0 2009-01-02T07:10:00.450 2009 1 -true 0 0 0 0 0.0 0 20 01/03/09 0 2009-01-03T07:20:00.900 2009 1 -true 0 0 0 0 0.0 0 30 01/04/09 0 2009-01-04T07:30:01.350 2009 1 -true 0 0 0 0 0.0 0 40 01/05/09 0 2009-01-05T07:40:01.800 2009 1 -true 0 0 0 0 0.0 0 50 01/06/09 0 2009-01-06T07:50:02.250 2009 1 -true 0 0 0 0 0.0 0 60 01/07/09 0 2009-01-07T08:00:02.700 2009 1 -true 0 0 0 0 0.0 0 70 01/08/09 0 2009-01-08T08:10:03.150 2009 1 -true 0 0 0 0 0.0 0 80 01/09/09 0 2009-01-09T08:20:03.600 2009 1 -true 0 0 0 0 0.0 0 90 01/10/09 0 2009-01-10T08:30:04.500 2009 1 - --- !all_types_float_col_topn_abs_desc -- -true 0 0 0 0 0.0 0 7290 12/31/10 0 2010-12-31T12:00:13.500 2010 12 -true 0 0 0 0 0.0 0 7280 12/30/10 0 2010-12-30T11:50:13.500 2010 12 -true 0 0 0 0 0.0 0 7270 12/29/10 0 2010-12-29T11:40:12.600 2010 12 -true 0 0 0 0 0.0 0 7260 12/28/10 0 2010-12-28T11:30:12.150 2010 12 -true 0 0 0 0 0.0 0 7250 12/27/10 0 2010-12-27T11:20:11.700 2010 12 -true 0 0 0 0 0.0 0 7240 12/26/10 0 2010-12-26T11:10:11.250 2010 12 -true 0 0 0 0 0.0 0 7230 12/25/10 0 2010-12-25T11:00:10.800 2010 12 -true 0 0 0 0 0.0 0 7220 12/24/10 0 2010-12-24T10:50:10.350 2010 12 -true 0 0 0 0 0.0 0 7210 12/23/10 0 2010-12-23T10:40:09.900 2010 12 -true 0 0 0 0 0.0 0 7200 12/22/10 0 2010-12-22T10:30:09.450 2010 12 - --- !all_types_double_col_topn_abs_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -true 0 0 0 0 0.0 0 10 01/02/09 0 2009-01-02T07:10:00.450 2009 1 -true 0 0 0 0 0.0 0 20 01/03/09 0 2009-01-03T07:20:00.900 2009 1 -true 0 0 0 0 0.0 0 30 01/04/09 0 2009-01-04T07:30:01.350 2009 1 -true 0 0 0 0 0.0 0 40 01/05/09 0 2009-01-05T07:40:01.800 2009 1 -true 0 0 0 0 0.0 0 50 01/06/09 0 2009-01-06T07:50:02.250 2009 1 -true 0 0 0 0 0.0 0 60 01/07/09 0 2009-01-07T08:00:02.700 2009 1 -true 0 0 0 0 0.0 0 70 01/08/09 0 2009-01-08T08:10:03.150 2009 1 -true 0 0 0 0 0.0 0 80 01/09/09 0 2009-01-09T08:20:03.600 2009 1 -true 0 0 0 0 0.0 0 90 01/10/09 0 2009-01-10T08:30:04.500 2009 1 - --- !all_types_double_col_topn_abs_desc -- -true 0 0 0 0 0.0 0 7290 12/31/10 0 2010-12-31T12:00:13.500 2010 12 -true 0 0 0 0 0.0 0 7280 12/30/10 0 2010-12-30T11:50:13.500 2010 12 -true 0 0 0 0 0.0 0 7270 12/29/10 0 2010-12-29T11:40:12.600 2010 12 -true 0 0 0 0 0.0 0 7260 12/28/10 0 2010-12-28T11:30:12.150 2010 12 -true 0 0 0 0 0.0 0 7250 12/27/10 0 2010-12-27T11:20:11.700 2010 12 -true 0 0 0 0 0.0 0 7240 12/26/10 0 2010-12-26T11:10:11.250 2010 12 -true 0 0 0 0 0.0 0 7230 12/25/10 0 2010-12-25T11:00:10.800 2010 12 -true 0 0 0 0 0.0 0 7220 12/24/10 0 2010-12-24T10:50:10.350 2010 12 -true 0 0 0 0 0.0 0 7210 12/23/10 0 2010-12-23T10:40:09.900 2010 12 -true 0 0 0 0 0.0 0 7200 12/22/10 0 2010-12-22T10:30:09.450 2010 12 - --- !all_types_id_topn_abs_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -false 1 1 1 10 1.1 10.1 1 01/01/09 1 2009-01-01T07:01 2009 1 -true 2 2 2 20 2.2 20.2 2 01/01/09 2 2009-01-01T07:02:00.100 2009 1 -false 3 3 3 30 3.3 30.3 3 01/01/09 3 2009-01-01T07:03:00.300 2009 1 -true 4 4 4 40 4.4 40.4 4 01/01/09 4 2009-01-01T07:04:00.600 2009 1 -false 5 5 5 50 5.5 50.5 5 01/01/09 5 2009-01-01T07:05:00.100 2009 1 -true 6 6 6 60 6.6 60.59999999999999 6 01/01/09 6 2009-01-01T07:06:00.150 2009 1 -false 7 7 7 70 7.7 70.7 7 01/01/09 7 2009-01-01T07:07:00.210 2009 1 -true 8 8 8 80 8.8 80.8 8 01/01/09 8 2009-01-01T07:08:00.280 2009 1 -false 9 9 9 90 9.9 90.89999999999999 9 01/01/09 9 2009-01-01T07:09:00.360 2009 1 - --- !all_types_id_topn_abs_desc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -false 1 1 1 10 1.1 10.1 1 01/01/09 1 2009-01-01T07:01 2009 1 -true 2 2 2 20 2.2 20.2 2 01/01/09 2 2009-01-01T07:02:00.100 2009 1 -false 3 3 3 30 3.3 30.3 3 01/01/09 3 2009-01-01T07:03:00.300 2009 1 -true 4 4 4 40 4.4 40.4 4 01/01/09 4 2009-01-01T07:04:00.600 2009 1 -false 5 5 5 50 5.5 50.5 5 01/01/09 5 2009-01-01T07:05:00.100 2009 1 -true 6 6 6 60 6.6 60.59999999999999 6 01/01/09 6 2009-01-01T07:06:00.150 2009 1 -false 7 7 7 70 7.7 70.7 7 01/01/09 7 2009-01-01T07:07:00.210 2009 1 -true 8 8 8 80 8.8 80.8 8 01/01/09 8 2009-01-01T07:08:00.280 2009 1 -false 9 9 9 90 9.9 90.89999999999999 9 01/01/09 9 2009-01-01T07:09:00.360 2009 1 - --- !all_types_date_string_col_topn_abs_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -false 1 1 1 10 1.1 10.1 1 01/01/09 1 2009-01-01T07:01 2009 1 -true 2 2 2 20 2.2 20.2 2 01/01/09 2 2009-01-01T07:02:00.100 2009 1 -false 3 3 3 30 3.3 30.3 3 01/01/09 3 2009-01-01T07:03:00.300 2009 1 -true 4 4 4 40 4.4 40.4 4 01/01/09 4 2009-01-01T07:04:00.600 2009 1 -false 5 5 5 50 5.5 50.5 5 01/01/09 5 2009-01-01T07:05:00.100 2009 1 -true 6 6 6 60 6.6 60.59999999999999 6 01/01/09 6 2009-01-01T07:06:00.150 2009 1 -false 7 7 7 70 7.7 70.7 7 01/01/09 7 2009-01-01T07:07:00.210 2009 1 -true 8 8 8 80 8.8 80.8 8 01/01/09 8 2009-01-01T07:08:00.280 2009 1 -false 9 9 9 90 9.9 90.89999999999999 9 01/01/09 9 2009-01-01T07:09:00.360 2009 1 - --- !all_types_date_string_col_topn_abs_desc -- -false 9 9 9 90 9.9 90.89999999999999 7299 12/31/10 9 2010-12-31T12:09:13.860 2010 12 -true 8 8 8 80 8.8 80.8 7298 12/31/10 8 2010-12-31T12:08:13.780 2010 12 -false 7 7 7 70 7.7 70.7 7297 12/31/10 7 2010-12-31T12:07:13.710 2010 12 -true 6 6 6 60 6.6 60.59999999999999 7296 12/31/10 6 2010-12-31T12:06:13.650 2010 12 -false 5 5 5 50 5.5 50.5 7295 12/31/10 5 2010-12-31T12:05:13.600 2010 12 -true 4 4 4 40 4.4 40.4 7294 12/31/10 4 2010-12-31T12:04:13.560 2010 12 -false 3 3 3 30 3.3 30.3 7293 12/31/10 3 2010-12-31T12:03:13.530 2010 12 -true 2 2 2 20 2.2 20.2 7292 12/31/10 2 2010-12-31T12:02:13.510 2010 12 -false 1 1 1 10 1.1 10.1 7291 12/31/10 1 2010-12-31T12:01:13.500 2010 12 -true 0 0 0 0 0.0 0 7290 12/31/10 0 2010-12-31T12:00:13.500 2010 12 - --- !all_types_string_col_topn_abs_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -true 0 0 0 0 0.0 0 10 01/02/09 0 2009-01-02T07:10:00.450 2009 1 -true 0 0 0 0 0.0 0 20 01/03/09 0 2009-01-03T07:20:00.900 2009 1 -true 0 0 0 0 0.0 0 30 01/04/09 0 2009-01-04T07:30:01.350 2009 1 -true 0 0 0 0 0.0 0 40 01/05/09 0 2009-01-05T07:40:01.800 2009 1 -true 0 0 0 0 0.0 0 50 01/06/09 0 2009-01-06T07:50:02.250 2009 1 -true 0 0 0 0 0.0 0 60 01/07/09 0 2009-01-07T08:00:02.700 2009 1 -true 0 0 0 0 0.0 0 70 01/08/09 0 2009-01-08T08:10:03.150 2009 1 -true 0 0 0 0 0.0 0 80 01/09/09 0 2009-01-09T08:20:03.600 2009 1 -true 0 0 0 0 0.0 0 90 01/10/09 0 2009-01-10T08:30:04.500 2009 1 - --- !all_types_string_col_topn_abs_desc -- -true 0 0 0 0 0.0 0 7290 12/31/10 0 2010-12-31T12:00:13.500 2010 12 -true 0 0 0 0 0.0 0 7280 12/30/10 0 2010-12-30T11:50:13.500 2010 12 -true 0 0 0 0 0.0 0 7270 12/29/10 0 2010-12-29T11:40:12.600 2010 12 -true 0 0 0 0 0.0 0 7260 12/28/10 0 2010-12-28T11:30:12.150 2010 12 -true 0 0 0 0 0.0 0 7250 12/27/10 0 2010-12-27T11:20:11.700 2010 12 -true 0 0 0 0 0.0 0 7240 12/26/10 0 2010-12-26T11:10:11.250 2010 12 -true 0 0 0 0 0.0 0 7230 12/25/10 0 2010-12-25T11:00:10.800 2010 12 -true 0 0 0 0 0.0 0 7220 12/24/10 0 2010-12-24T10:50:10.350 2010 12 -true 0 0 0 0 0.0 0 7210 12/23/10 0 2010-12-23T10:40:09.900 2010 12 -true 0 0 0 0 0.0 0 7200 12/22/10 0 2010-12-22T10:30:09.450 2010 12 - --- !all_types_timestamp_col_topn_abs_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -false 1 1 1 10 1.1 10.1 1 01/01/09 1 2009-01-01T07:01 2009 1 -true 2 2 2 20 2.2 20.2 2 01/01/09 2 2009-01-01T07:02:00.100 2009 1 -false 3 3 3 30 3.3 30.3 3 01/01/09 3 2009-01-01T07:03:00.300 2009 1 -true 4 4 4 40 4.4 40.4 4 01/01/09 4 2009-01-01T07:04:00.600 2009 1 -false 5 5 5 50 5.5 50.5 5 01/01/09 5 2009-01-01T07:05:00.100 2009 1 -true 6 6 6 60 6.6 60.59999999999999 6 01/01/09 6 2009-01-01T07:06:00.150 2009 1 -false 7 7 7 70 7.7 70.7 7 01/01/09 7 2009-01-01T07:07:00.210 2009 1 -true 8 8 8 80 8.8 80.8 8 01/01/09 8 2009-01-01T07:08:00.280 2009 1 -false 9 9 9 90 9.9 90.89999999999999 9 01/01/09 9 2009-01-01T07:09:00.360 2009 1 - --- !all_types_timestamp_col_topn_abs_desc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -false 1 1 1 10 1.1 10.1 1 01/01/09 1 2009-01-01T07:01 2009 1 -true 2 2 2 20 2.2 20.2 2 01/01/09 2 2009-01-01T07:02:00.100 2009 1 -false 3 3 3 30 3.3 30.3 3 01/01/09 3 2009-01-01T07:03:00.300 2009 1 -true 4 4 4 40 4.4 40.4 4 01/01/09 4 2009-01-01T07:04:00.600 2009 1 -false 5 5 5 50 5.5 50.5 5 01/01/09 5 2009-01-01T07:05:00.100 2009 1 -true 6 6 6 60 6.6 60.59999999999999 6 01/01/09 6 2009-01-01T07:06:00.150 2009 1 -false 7 7 7 70 7.7 70.7 7 01/01/09 7 2009-01-01T07:07:00.210 2009 1 -true 8 8 8 80 8.8 80.8 8 01/01/09 8 2009-01-01T07:08:00.280 2009 1 -false 9 9 9 90 9.9 90.89999999999999 9 01/01/09 9 2009-01-01T07:09:00.360 2009 1 - --- !all_types_year_topn_abs_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -false 1 1 1 10 1.1 10.1 1 01/01/09 1 2009-01-01T07:01 2009 1 -true 2 2 2 20 2.2 20.2 2 01/01/09 2 2009-01-01T07:02:00.100 2009 1 -false 3 3 3 30 3.3 30.3 3 01/01/09 3 2009-01-01T07:03:00.300 2009 1 -true 4 4 4 40 4.4 40.4 4 01/01/09 4 2009-01-01T07:04:00.600 2009 1 -false 5 5 5 50 5.5 50.5 5 01/01/09 5 2009-01-01T07:05:00.100 2009 1 -true 6 6 6 60 6.6 60.59999999999999 6 01/01/09 6 2009-01-01T07:06:00.150 2009 1 -false 7 7 7 70 7.7 70.7 7 01/01/09 7 2009-01-01T07:07:00.210 2009 1 -true 8 8 8 80 8.8 80.8 8 01/01/09 8 2009-01-01T07:08:00.280 2009 1 -false 9 9 9 90 9.9 90.89999999999999 9 01/01/09 9 2009-01-01T07:09:00.360 2009 1 - --- !all_types_year_topn_abs_desc -- -false 9 9 9 90 9.9 90.89999999999999 3649 12/31/09 9 2009-12-31T12:09:13.860 2009 12 -true 8 8 8 80 8.8 80.8 3648 12/31/09 8 2009-12-31T12:08:13.780 2009 12 -false 7 7 7 70 7.7 70.7 3647 12/31/09 7 2009-12-31T12:07:13.710 2009 12 -true 6 6 6 60 6.6 60.59999999999999 3646 12/31/09 6 2009-12-31T12:06:13.650 2009 12 -false 5 5 5 50 5.5 50.5 3645 12/31/09 5 2009-12-31T12:05:13.600 2009 12 -true 4 4 4 40 4.4 40.4 3644 12/31/09 4 2009-12-31T12:04:13.560 2009 12 -false 3 3 3 30 3.3 30.3 3643 12/31/09 3 2009-12-31T12:03:13.530 2009 12 -true 2 2 2 20 2.2 20.2 3642 12/31/09 2 2009-12-31T12:02:13.510 2009 12 -false 1 1 1 10 1.1 10.1 3641 12/31/09 1 2009-12-31T12:01:13.500 2009 12 -true 0 0 0 0 0.0 0 3640 12/31/09 0 2009-12-31T12:00:13.500 2009 12 - --- !all_types_month_topn_abs_asc -- -true 0 0 0 0 0.0 0 0 01/01/09 0 2009-01-01T07:00 2009 1 -false 1 1 1 10 1.1 10.1 1 01/01/09 1 2009-01-01T07:01 2009 1 -true 2 2 2 20 2.2 20.2 2 01/01/09 2 2009-01-01T07:02:00.100 2009 1 -false 3 3 3 30 3.3 30.3 3 01/01/09 3 2009-01-01T07:03:00.300 2009 1 -true 4 4 4 40 4.4 40.4 4 01/01/09 4 2009-01-01T07:04:00.600 2009 1 -false 5 5 5 50 5.5 50.5 5 01/01/09 5 2009-01-01T07:05:00.100 2009 1 -true 6 6 6 60 6.6 60.59999999999999 6 01/01/09 6 2009-01-01T07:06:00.150 2009 1 -false 7 7 7 70 7.7 70.7 7 01/01/09 7 2009-01-01T07:07:00.210 2009 1 -true 8 8 8 80 8.8 80.8 8 01/01/09 8 2009-01-01T07:08:00.280 2009 1 -false 9 9 9 90 9.9 90.89999999999999 9 01/01/09 9 2009-01-01T07:09:00.360 2009 1 - --- !all_types_month_topn_abs_desc -- -false 9 9 9 90 9.9 90.89999999999999 3959 01/31/10 9 2010-01-31T12:09:13.860 2010 1 -true 8 8 8 80 8.8 80.8 3958 01/31/10 8 2010-01-31T12:08:13.780 2010 1 -false 7 7 7 70 7.7 70.7 3957 01/31/10 7 2010-01-31T12:07:13.710 2010 1 -true 6 6 6 60 6.6 60.59999999999999 3956 01/31/10 6 2010-01-31T12:06:13.650 2010 1 -false 5 5 5 50 5.5 50.5 3955 01/31/10 5 2010-01-31T12:05:13.600 2010 1 -true 4 4 4 40 4.4 40.4 3954 01/31/10 4 2010-01-31T12:04:13.560 2010 1 -false 3 3 3 30 3.3 30.3 3953 01/31/10 3 2010-01-31T12:03:13.530 2010 1 -true 2 2 2 20 2.2 20.2 3952 01/31/10 2 2010-01-31T12:02:13.510 2010 1 -false 1 1 1 10 1.1 10.1 3951 01/31/10 1 2010-01-31T12:01:13.500 2010 1 -true 0 0 0 0 0.0 0 3950 01/31/10 0 2010-01-31T12:00:13.500 2010 1 - --- !schema_1 -- -1 638 6 15635 32.00 49620.16 0.07 0.02 N O 1996-01-30 1996-02-07 1996-02-03 DELIVER IN PERSON MAIL arefully slyly ex cn beijing - --- !schema_2 -- -6374628540732951412 -77 -65 -70 -107 -215 65 0 -526 -1309 3750 8827 -19795 34647 57042 -1662 -138248 -890685 -228568 1633079 -2725524 6163040 -10491702 697237 74565050 127767368 93532213 -209675435 -32116110 -3624917040 -2927805617 15581947241 21893441661 24075494509 -116822110531 -59683724667 -146210393388 114424524398 1341560771667 -1638742564263 520137948334 -2927347587131 7415137351179 -7963937754617 52157548982266 140803519083304 -294675355729619 -868076759504942 181128508165910 -91753231238823 -3511241416682881 -11545256318348796 -1952917510863468 -5161099825338866 -59726090170689781 287170105829528178 607326725526282735 1253194074103207461 -162443950414676064 -2964036188567341159 2602201580810990248 5581917084094110764 111739292249520611 -315687754593838642 -2804420462762366976 -2078683524 - --- !schema_3 -- -false 5 5 5 50 5.5 50.5 7295 12/31/10 5 2010-12-31T12:05:13.600 2010 12 -false 7 7 7 70 7.7 70.7 7297 12/31/10 7 2010-12-31T12:07:13.710 2010 12 -false 9 9 9 90 9.9 90.89999999999999 7299 12/31/10 9 2010-12-31T12:09:13.860 2010 12 -true 6 6 6 60 6.6 60.59999999999999 7296 12/31/10 6 2010-12-31T12:06:13.650 2010 12 -true 8 8 8 80 8.8 80.8 7298 12/31/10 8 2010-12-31T12:08:13.780 2010 12 - --- !schema_4 -- -2 24 15314771 999319712124142303 true 6.009337E8 4.817722807977021e+16 \N northern rural 2022-08-30T23:21:08 407186.2849 phones int_col 2019-01-01 [2.595433907849411e+17, 5.88165568758352e+17, 4.780259987226574e+17, 6.926622881251557e+17, 9.86405645575228e+17] \N phones int_col -5 59 317349992 998913039814974432 false 5.6584858E8 9.900861328269033e+17 Handling man satisfy firework descent top. Racing closed county set-up crown cave. Correctly front duration pure. \N 2022-09-02T19:52:57 372765.2493 desktops tinyint_col 2021-10-03 [9.983261252571983e+17, 3.612076153030643e+17, 9.969131496509435e+17, 8.991290717923475e+17, 1.195589374709888e+17] ["CrySxz", "FMXGRcaGbahSVqhp", "oRKqPmhM", "VdODasEdDWFSRIQf"] desktops tinyint_col -6 62 915699741 999653836472045196 true 4.51937504E8 8.796150544502191e+17 Tale get speed platform august curved. Ease grass neighbour landlord. Baby genetic youth. \N 2022-08-07T09:30:56 875620.2176 phones smallint_col \N [9.423540715161855e+17, 4.833249992029562e+17, 9.167007747789834e+17] ["zNfbLeFx", "GNTJOmWJyRmOK", "hwvfhSQGsaaMEqUrWCK", "cQrQsROKLARA", "nONj", "oepXBFB", "IPtUql"] phones smallint_col - --- !schema_5 -- -00cwjIryUv EXHwpeK2Nl hv2PYEMYMM eo69nyw4Yv K6797tgjFg LlFNd8Kyy5 wkpLCO3uo1 AIXCj1MfeD ni0HxZbiUO 6IjRdM8Gqi qsTMK6A2eC 1wu7v9OPwW qavArd9tDc sU88hZADLj lyzWlwLOCx 2022-11-25 - --- !schema_6 -- -"" "test" - --- !schema_7 -- -\N \N \N \N \N \N \N \N \N test test test 1 2 3 4 5.1 6.2 true false 2011-05-06 2011-05-06T07:08:09.123 -1.2 12.30 -1234.5678 123456789.12340000 -1234567890.12345678 1234567890123456789012.1234567800000000 dGVzdDI= - diff --git a/regression-test/data/external_table_p0/hive/test_hive_openx_json.out b/regression-test/data/external_table_p0/hive/test_hive_openx_json.out index 6eadea56694c85..f4fd28e4d05725 100644 --- a/regression-test/data/external_table_p0/hive/test_hive_openx_json.out +++ b/regression-test/data/external_table_p0/hive/test_hive_openx_json.out @@ -10,6 +10,7 @@ \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N +\N \N \N \N \N 1 Alice [1, 2, 3] {"math":90, "english":85} {"a":100, "b":"test1", "c":1234567890} 2 Bob [4, 5] {"math":80, "science":95} {"a":200, "b":"test2", "c":9876543210} diff --git a/regression-test/data/external_table_p0/hive/test_hive_schema_evolution.out b/regression-test/data/external_table_p0/hive/test_hive_schema_evolution.out index dbea5056998664..1cb5cde15144e4 100644 --- a/regression-test/data/external_table_p0/hive/test_hive_schema_evolution.out +++ b/regression-test/data/external_table_p0/hive/test_hive_schema_evolution.out @@ -35,39 +35,3 @@ \N 2023-01-01T13:01:03 --- !q01 -- -1 kaka \N -2 messi 2023-01-01T13:01:03 - --- !q02 -- -1 kaka \N -2 messi 2023-01-01T13:01:03 - --- !q03 -- -\N -2023-01-01T13:01:03 - --- !q01 -- -1 kaka \N -2 messi 2023-01-01T21:01:03 - --- !q02 -- -1 kaka \N -2 messi 2023-01-01T21:01:03 - --- !q03 -- -\N -2023-01-01T21:01:03 - --- !q01 -- -1 kaka \N -2 messi 2023-01-01T13:01:03 - --- !q02 -- -1 kaka \N -2 messi 2023-01-01T13:01:03 - --- !q03 -- -\N -2023-01-01T13:01:03 - diff --git a/regression-test/data/external_table_p0/hive/write/test_hive_write_insert.out b/regression-test/data/external_table_p0/hive/write/test_hive_write_insert.out index 932b62b5034b94..d3df453f105971 100644 --- a/regression-test/data/external_table_p0/hive/write/test_hive_write_insert.out +++ b/regression-test/data/external_table_p0/hive/write/test_hive_write_insert.out @@ -21,232 +21,6 @@ false -7 -15 16 -9223372036854775808 -123.45 -123456.789 123456789 -1234.5678 -1 -- !q05 -- \N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240321 --- !q06 -- - --- !q01 -- -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q02 -- -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q03 -- -\N \N \N \N \N -123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {2:20} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [3.4567, 4.5678] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N \N -\N \N \N \N \N -123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {3:20} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [8.4567, 4.5678] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N \N -\N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N \N -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q04 -- -\N \N \N \N \N -123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {2:20} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [3.4567, 4.5678] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N \N -\N \N \N \N \N -123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {3:20} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [8.4567, 4.5678] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N \N -\N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N \N - --- !q05 -- - --- !q01 -- -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q02 -- -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123457 2024-03-21T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123457 2024-03-22T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q03 -- -\N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123457 2024-03-21T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123457 2024-03-22T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q04 -- -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123457 2024-03-22T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -false -7 -15 16 -9223372036854775808 -123.45 -123456.789 123456789 -1234.5678 -123456.789012 -123456789.012345678901 str binary_value 2024-03-25 2024-03-25T12:00 2024-03-25T12:00:00.123457 2024-03-25T12:00:00.123457 char_value11111 char_value22222 char_value33333 varchar_value11111 varchar_value22222 varchar_value33333 {"key7":"value1"} {"key7":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {5.3456:2.3456} {5.34567890:2.34567890} {2.34567890:2.34567890} {7.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [9.4567, 4.5678] [6.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240321 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q05 -- -\N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123457 2024-03-22T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q01 -- -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q02 -- -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q03 -- -\N \N \N \N \N -123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {2:20} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [3.4567, 4.5678] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240321 -\N \N \N \N \N -123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {3:20} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [8.4567, 4.5678] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240322 -\N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240320 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q04 -- -\N \N \N \N \N -123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {2:20} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [3.4567, 4.5678] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240321 -\N \N \N \N \N -123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {3:20} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [8.4567, 4.5678] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240322 -\N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240320 - --- !q01 -- -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q02 -- -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123457 2024-03-21T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123457 2024-03-22T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q03 -- -\N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N \N -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123457 2024-03-21T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123457 2024-03-22T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q04 -- -false -7 -15 16 -9223372036854775808 -123.45 -123456.789 123456789 -1234.5678 -123456.789012 -123456789.012345678901 str binary_value 2024-03-25 2024-03-25T12:00 2024-03-25T12:00:00.123457 2024-03-25T12:00:00.123457 char_value11111 char_value22222 char_value33333 varchar_value11111 varchar_value22222 varchar_value33333 {"key7":"value1"} {"key7":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {5.3456:2.3456} {5.34567890:2.34567890} {2.34567890:2.34567890} {7.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [9.4567, 4.5678] [6.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240325 - --- !q05 -- -\N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240321 - --- !q06 -- - --- !q01 -- -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q02 -- -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q03 -- -\N \N \N \N \N -123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {2:20} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [3.4567, 4.5678] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N \N -\N \N \N \N \N -123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {3:20} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [8.4567, 4.5678] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N \N -\N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N \N -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q04 -- -\N \N \N \N \N -123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {2:20} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [3.4567, 4.5678] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N \N -\N \N \N \N \N -123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {3:20} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [8.4567, 4.5678] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N \N -\N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N \N - --- !q05 -- - --- !q01 -- -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q02 -- -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123457 2024-03-21T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123457 2024-03-22T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q03 -- -\N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123457 2024-03-21T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123457 2024-03-22T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q04 -- -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123457 2024-03-22T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -false -7 -15 16 -9223372036854775808 -123.45 -123456.789 123456789 -1234.5678 -123456.789012 -123456789.012345678901 str binary_value 2024-03-25 2024-03-25T12:00 2024-03-25T12:00:00.123457 2024-03-25T12:00:00.123457 char_value11111 char_value22222 char_value33333 varchar_value11111 varchar_value22222 varchar_value33333 {"key7":"value1"} {"key7":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {5.3456:2.3456} {5.34567890:2.34567890} {2.34567890:2.34567890} {7.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [9.4567, 4.5678] [6.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240321 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q05 -- -\N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123457 2024-03-22T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q01 -- -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q02 -- -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q03 -- -\N \N \N \N \N -123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {2:20} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [3.4567, 4.5678] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240321 -\N \N \N \N \N -123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {3:20} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [8.4567, 4.5678] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240322 -\N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240320 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123456 2024-03-20T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q04 -- -\N \N \N \N \N -123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {2:20} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [3.4567, 4.5678] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240321 -\N \N \N \N \N -123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {3:20} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [8.4567, 4.5678] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240322 -\N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240320 - --- !q01 -- -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q02 -- -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123457 2024-03-21T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123457 2024-03-22T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q03 -- -\N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N \N -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123457 2024-03-21T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123457 2024-03-22T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 - --- !q04 -- -false -7 -15 16 -9223372036854775808 -123.45 -123456.789 123456789 -1234.5678 -123456.789012 -123456789.012345678901 str binary_value 2024-03-25 2024-03-25T12:00 2024-03-25T12:00:00.123457 2024-03-25T12:00:00.123457 char_value11111 char_value22222 char_value33333 varchar_value11111 varchar_value22222 varchar_value33333 {"key7":"value1"} {"key7":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {5.3456:2.3456} {5.34567890:2.34567890} {2.34567890:2.34567890} {7.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [9.4567, 4.5678] [6.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240325 - --- !q05 -- -\N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240321 - --- !q06 -- - -- !q01 -- false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 @@ -276,8 +50,6 @@ true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5 \N \N \N \N \N -123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {3:20} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [8.4567, 4.5678] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N \N \N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N \N --- !q05 -- - -- !q01 -- true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 @@ -357,8 +129,6 @@ false -7 -15 16 -9223372036854775808 -123.45 -123456.789 123456789 -1234.5678 -1 -- !q05 -- \N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N 20240321 --- !q06 -- - -- !q01 -- false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123456 2024-03-22T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint":-1234567890} {"key":[{"s_int":-123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 @@ -388,8 +158,6 @@ true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5 \N \N \N \N \N -123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {3:20} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [8.4567, 4.5678] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N \N \N \N \N \N \N 123.45 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N {1:10} \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N [1.2345, 2.3456] \N \N \N \N \N \N \N \N \N \N [null, "value1", "value2"] \N \N \N \N --- !q05 -- - -- !q01 -- true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint":1234567890} {"key":[{"s_int":123}]} {"struct_field":["value1", "value2"]} {"struct_field_null":null, "struct_field_null2":null} {"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"} {"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 diff --git a/regression-test/data/external_table_p0/iceberg/test_iceberg_export_timestamp_tz.out b/regression-test/data/external_table_p0/iceberg/test_iceberg_export_timestamp_tz.out index 529e37390f05bc..bfc73649139041 100644 --- a/regression-test/data/external_table_p0/iceberg/test_iceberg_export_timestamp_tz.out +++ b/regression-test/data/external_table_p0/iceberg/test_iceberg_export_timestamp_tz.out @@ -8,62 +8,62 @@ id int Yes true \N ts_tz timestamptz(6) Yes true \N WITH_TIMEZONE -- !select_tvf0 -- -1 2025-01-01 00:00:00+08:00 -2 2025-06-01 12:34:56+08:00 -3 2025-12-31 23:59:59+08:00 +1 2025-01-01 00:00:00.000000+08:00 +2 2025-06-01 12:34:56.789000+08:00 +3 2025-12-31 23:59:59.999999+08:00 4 \N -- !select_tvf0_desc -- id int Yes false \N NONE -ts_tz timestamptz Yes false \N NONE +ts_tz timestamptz(6) Yes false \N NONE -- !select_tvf0_false -- -1 2025-01-01 00:00:00+08:00 -2 2025-06-01 12:34:56+08:00 -3 2025-12-31 23:59:59+08:00 +1 2025-01-01 00:00:00.000000+08:00 +2 2025-06-01 12:34:56.789000+08:00 +3 2025-12-31 23:59:59.999999+08:00 4 \N -- !select_tvf0_desc_false -- id int Yes false \N NONE -ts_tz timestamptz Yes false \N NONE +ts_tz timestamptz(6) Yes false \N NONE -- !select_tvf1 -- -1 2025-01-01 00:00:00+08:00 -2 2025-06-01 12:34:56+08:00 -3 2025-12-31 23:59:59+08:00 +1 2025-01-01 00:00:00.000000+08:00 +2 2025-06-01 12:34:56.789000+08:00 +3 2025-12-31 23:59:59.999999+08:00 4 \N -- !select_tvf1_desc -- id int Yes false \N NONE -ts_tz timestamptz Yes false \N NONE +ts_tz timestamptz(6) Yes false \N NONE -- !select_tvf1_false -- -1 2025-01-01 00:00:00+08:00 -2 2025-06-01 12:34:56+08:00 -3 2025-12-31 23:59:59+08:00 +1 2025-01-01 00:00:00.000000+08:00 +2 2025-06-01 12:34:56.789000+08:00 +3 2025-12-31 23:59:59.999999+08:00 4 \N -- !select_tvf1_desc_false -- id int Yes false \N NONE -ts_tz timestamptz Yes false \N NONE +ts_tz timestamptz(6) Yes false \N NONE -- !select_tvf2 -- -1 2025-01-01 00:00:00+08:00 -2 2025-06-01 12:34:56+08:00 -3 2025-12-31 23:59:59+08:00 +1 2025-01-01 00:00:00.000000+08:00 +2 2025-06-01 12:34:56.789000+08:00 +3 2025-12-31 23:59:59.999999+08:00 4 \N -- !select_tvf2_desc -- id int Yes false \N NONE -ts_tz timestamptz Yes false \N NONE +ts_tz timestamptz(6) Yes false \N NONE -- !select_tvf3 -- -1 2025-01-01 00:00:00+08:00 -2 2025-06-01 12:34:56+08:00 -3 2025-12-31 23:59:59+08:00 +1 2025-01-01 00:00:00.000000+08:00 +2 2025-06-01 12:34:56.789000+08:00 +3 2025-12-31 23:59:59.999999+08:00 4 \N -- !select_tvf3_desc -- id int Yes false \N NONE -ts_tz timestamptz Yes false \N NONE +ts_tz timestamptz(6) Yes false \N NONE diff --git a/regression-test/data/external_table_p0/paimon/test_paimon_catalog_timestamp_tz.out b/regression-test/data/external_table_p0/paimon/test_paimon_catalog_timestamp_tz.out index 66207238741815..6a6ebab9001e43 100644 --- a/regression-test/data/external_table_p0/paimon/test_paimon_catalog_timestamp_tz.out +++ b/regression-test/data/external_table_p0/paimon/test_paimon_catalog_timestamp_tz.out @@ -28,11 +28,11 @@ ts_ltz timestamptz(3) Yes true \N WITH_TIMEZONE 3 2024-11-11 11:11:11.123+08:00 -- !mapping_tz -- -1 2024-01-01 10:00:00+08:00 -2 2026-01-06 16:13:12+08:00 -3 2024-11-11 11:11:11+08:00 +1 2024-01-01 10:00:00.000+08:00 +2 2026-01-06 16:13:12.000+08:00 +3 2024-11-11 11:11:11.123+08:00 -- !mapping_tz_desc -- id int Yes false \N NONE -ts_ltz timestamptz Yes false \N NONE +ts_ltz timestamptz(3) Yes false \N NONE diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group0.out b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group0.out index 0e21a8fad6f690858499234dde89675694b89fa2..015d9391317356db1bf8a9ec641cec332546ff94 100644 GIT binary patch delta 2492 zcmZuz!EW0|5Jir==(Rw4Tj*9mCPYeh5?=z^VjU(i<;YS2r|n@yt)#7~U1mupQi6aV z(H|+AWB;Mw)Y&C>NgLt8u&AA#nfKnj(O>^Q`S#zFzkhgIkI2ah9-;8cc7*TfmU8T)aDEjfi#Q39kKpV^9#}zg zCHK5w_GFq$wdP_=N=eEcBZ_USI-`nE_@)wN;gOZh*6{vA#R^D7Gbyk?Lh(FI&;+&B z=(pQ~uNjdWb5B!XiN|Ge+S27`>j&r>6FPdcc*eG7g!u zV@>EvR;B5q1H4J7NDh}w(3>c^GtA`Bn#PYE(y9P_8>YynQrr~eI`l|;w5u)W0$^V; zDhl$6-ZQevs?CPi&~{D%jS~B(=z2DvU0%*W1xjc(+9DfTfFq?z*#rDdy_qba1;eAJ zF{kN0-9lYZT(2m@X`a!NWvnb-V*d>CybrH0;&;oahFp+|D+vq@NhPYH0{zh9xX(*j z!eypdSBb*@GjtQpm!LCuXRlHV5s204psB~Ss|A}X30!I3*+*6j(nF>%`hsUnfJ%%u zvHu)hzS8mzkP(u*97N#Yw>l8AluGadgwI&s%HML@i5(-Lb>aEJXDXg<3R6BxxY z0@1;6bWmvFbaH9)l6{qxJkK~&0HiJ9LH~kob=*g0x^A$hda${wHSTr*aeyx7;rsY4 zfD1r9p0ggd#t0sum9vxsaZUpdYi)9ZEhx~3*CJ9tObtOe7@^y6IgLp;4=G*)_0kMzpc0 z&Ji?L8efZsXE}p`R6VoC!NlHpy9gK2t)a55CbD+t<#-z z;FH6NqQ`Sf!O&i04&dh~PNq;}fWYtU&SO6i{&yT_dw?Bxm;k;#uwzI{IVY4kt+qgbN= delta 19 bcmdmdj&bsC#tl3Yn>Q%rh;EiJmo@+ZQ7#7e diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group2.out b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group2.out index 16b89ac45d63ca..79b63e41cc1b4d 100644 --- a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group2.out +++ b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group2.out @@ -24,14 +24,14 @@ apple_banana_mango81 apple_banana_mango9 -- !test_2 -- -1001-01-07T17:07:47.172032 1001-01-07T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-08T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-09T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-10T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-11T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-12T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-13T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-14T17:07:47.172032 +1001-01-07T17:07:46.123456 1001-01-07T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-08T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-09T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-10T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-11T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-12T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-13T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-14T17:07:46.123456 -- !test_3 -- [{"one":"0 - 0 - 1", "two":"0 - 0 - 2", "three":"0 - 0 - 3"}, {"one":"0 - 1 - 1", "two":"0 - 1 - 2", "three":"0 - 1 - 3"}] @@ -39,14 +39,14 @@ apple_banana_mango9 [{"one":"2 - 0 - 1", "two":"2 - 0 - 2", "three":"2 - 0 - 3"}, {"one":"2 - 1 - 1", "two":"2 - 1 - 2", "three":"2 - 1 - 3"}] -- !test_4 -- -1001-01-07T17:07:47.172032 1001-01-07T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-08T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-09T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-10T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-11T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-12T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-13T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-14T17:07:47.172032 +1001-01-07T17:07:46.123456 1001-01-07T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-08T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-09T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-10T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-11T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-12T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-13T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-14T17:07:46.123456 -- !test_5 -- ["good", "bye"] @@ -89,17 +89,17 @@ apple_banana_mango9 1981-01-07T00:00 15.8 1981-01-08T00:00 17.4 1981-01-09T00:00 21.8 -1981-01-10T00:00 20.0 +1981-01-10T00:00 20 -- !test_13 -- -1001-01-07T17:07:47.172032 1001-01-07T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-08T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-09T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-10T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-11T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-12T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-13T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-14T17:07:47.172032 +1001-01-07T17:07:46.123456 1001-01-07T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-08T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-09T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-10T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-11T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-12T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-13T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-14T17:07:46.123456 -- !test_14 -- [{"one":"First inner", "two":null, "three":null}, {"one":null, "two":"Second inner", "three":null}, {"one":null, "two":null, "three":"Third inner"}] @@ -119,17 +119,17 @@ apple_banana_mango9 -- !test_16 -- 1 Alice 2022-11-16T02:32:09 2 Bob 2022-11-16T02:32:09 -3 Cecilia 2022-11-16T02:32:09 +3 Cecilia 2022-11-16T02:32:09.123534 -- !test_17 -- -1001-01-07T17:07:47.172032 1001-01-07T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-08T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-09T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-10T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-11T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-12T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-13T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-14T17:07:47.172032 +1001-01-07T17:07:46.123456 1001-01-07T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-08T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-09T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-10T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-11T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-12T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-13T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-14T17:07:46.123456 -- !test_18 -- 0.00 @@ -151,14 +151,14 @@ apple_banana_mango9 2 -- !test_20 -- -1001-01-07T17:07:47.172032 1001-01-07T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-08T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-09T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-10T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-11T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-12T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-13T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-14T17:07:47.172032 +1001-01-07T17:07:46.123456 1001-01-07T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-08T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-09T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-10T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-11T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-12T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-13T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-14T17:07:46.123456 -- !test_21 -- 1001-01-07 1001-01-07 @@ -171,49 +171,49 @@ apple_banana_mango9 1001-01-07 1001-01-14 -- !test_22 -- -1001-01-07T17:07:47.171 1001-01-07T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-08T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-09T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-10T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-11T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-12T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-13T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-14T17:07:47.171 +1001-01-07T17:07:46.123 1001-01-07T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-08T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-09T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-10T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-11T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-12T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-13T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-14T17:07:46.123 -- !test_23 -- -1001-01-07T17:07:47.172032 1001-01-07T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-08T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-09T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-10T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-11T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-12T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-13T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-14T17:07:47.172032 +1001-01-07T17:07:46.123456 1001-01-07T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-08T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-09T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-10T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-11T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-12T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-13T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-14T17:07:46.123456 -- !test_24 -- false 1 2 3 10 1.2 val_1 val_1 HEARTS false 1 2 3 10 1.2 val_1 val_1 HEARTS ["arr_1", "arr_2", "arr_3"] [1] {1:"val_1", 2:"val_2", 3:"val_3"} {1:[{"nestedintscolumn":[1, 2, 3], "nestedstringcolumn":"val_1"}, {"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}, {"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}], 2:[{"nestedintscolumn":[1, 2, 3], "nestedstringcolumn":"val_1"}, {"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}, {"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}], 3:[{"nestedintscolumn":[1, 2, 3], "nestedstringcolumn":"val_1"}, {"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}, {"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}]} false 3 4 5 30 3.2 val_3 val_3 CLUBS \N \N \N \N \N \N \N \N \N ["arr_3", "arr_4", "arr_5"] [3] {3:"val_3", 4:"val_4", 5:"val_5"} {3:[{"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}, {"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}, {"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}], 4:[{"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}, {"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}, {"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}], 5:[{"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}, {"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}, {"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}]} false 5 6 7 50 5.2 val_5 val_5 HEARTS false 5 6 7 50 5.2 val_5 val_5 HEARTS ["arr_5", "arr_6", "arr_7"] [5] {5:"val_5", 6:"val_6", 7:"val_7"} {5:[{"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}, {"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}, {"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}], 6:[{"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}, {"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}, {"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}], 7:[{"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}, {"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}, {"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}]} false 7 8 9 70 7.2 val_7 val_7 CLUBS false 7 8 9 70 7.2 val_7 val_7 CLUBS ["arr_7", "arr_8", "arr_9"] [7] {7:"val_7", 8:"val_8", 9:"val_9"} {7:[{"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}, {"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}], 8:[{"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}, {"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}], 9:[{"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}, {"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}]} -false 9 10 11 90 9.2 val_9 val_9 HEARTS \N \N \N \N \N \N \N \N \N ["arr_9", "arr_10", "arr_11"] [9] {9:"val_9", 10:"val_10", 11:"val_11"} {9:[{"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}, {"nestedintscolumn":[11, 12, 13], "nestedstringcolumn":"val_11"}], 10:[{"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}, {"nestedintscolumn":[11, 12, 13], "nestedstringcolumn":"val_11"}], 11:[{"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}, {"nestedintscolumn":[11, 12, 13], "nestedstringcolumn":"val_11"}]} +false 9 10 11 90 9.199999999999999 val_9 val_9 HEARTS \N \N \N \N \N \N \N \N \N ["arr_9", "arr_10", "arr_11"] [9] {9:"val_9", 10:"val_10", 11:"val_11"} {9:[{"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}, {"nestedintscolumn":[11, 12, 13], "nestedstringcolumn":"val_11"}], 10:[{"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}, {"nestedintscolumn":[11, 12, 13], "nestedstringcolumn":"val_11"}], 11:[{"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}, {"nestedintscolumn":[11, 12, 13], "nestedstringcolumn":"val_11"}]} true 0 1 2 0 0.2 val_0 val_0 SPADES \N \N \N \N \N \N \N \N \N ["arr_0", "arr_1", "arr_2"] [0] {0:"val_0", 1:"val_1", 2:"val_2"} {0:[{"nestedintscolumn":[0, 1, 2], "nestedstringcolumn":"val_0"}, {"nestedintscolumn":[1, 2, 3], "nestedstringcolumn":"val_1"}, {"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}], 1:[{"nestedintscolumn":[0, 1, 2], "nestedstringcolumn":"val_0"}, {"nestedintscolumn":[1, 2, 3], "nestedstringcolumn":"val_1"}, {"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}], 2:[{"nestedintscolumn":[0, 1, 2], "nestedstringcolumn":"val_0"}, {"nestedintscolumn":[1, 2, 3], "nestedstringcolumn":"val_1"}, {"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}]} true 2 3 4 20 2.2 val_2 val_2 DIAMONDS true 2 3 4 20 2.2 val_2 val_2 DIAMONDS ["arr_2", "arr_3", "arr_4"] [2] {2:"val_2", 3:"val_3", 4:"val_4"} {2:[{"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}, {"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}, {"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}], 3:[{"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}, {"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}, {"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}], 4:[{"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}, {"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}, {"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}]} true 4 5 6 40 4.2 val_4 val_4 SPADES true 4 5 6 40 4.2 val_4 val_4 SPADES ["arr_4", "arr_5", "arr_6"] [4] {4:"val_4", 5:"val_5", 6:"val_6"} {4:[{"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}, {"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}, {"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}], 5:[{"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}, {"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}, {"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}], 6:[{"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}, {"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}, {"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}]} true 6 7 8 60 6.2 val_6 val_6 DIAMONDS \N \N \N \N \N \N \N \N \N ["arr_6", "arr_7", "arr_8"] [6] {6:"val_6", 7:"val_7", 8:"val_8"} {6:[{"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}, {"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}, {"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}], 7:[{"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}, {"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}, {"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}], 8:[{"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}, {"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}, {"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}]} -true 8 9 10 80 8.2 val_8 val_8 SPADES true 8 9 10 80 8.2 val_8 val_8 SPADES ["arr_8", "arr_9", "arr_10"] [8] {8:"val_8", 9:"val_9", 10:"val_10"} {8:[{"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}], 9:[{"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}], 10:[{"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}]} +true 8 9 10 80 8.199999999999999 val_8 val_8 SPADES true 8 9 10 80 8.199999999999999 val_8 val_8 SPADES ["arr_8", "arr_9", "arr_10"] [8] {8:"val_8", 9:"val_9", 10:"val_10"} {8:[{"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}], 9:[{"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}], 10:[{"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}]} -- !test_25 -- {"duration":"111222333444"} -- !test_26 -- -1001-01-07T17:07:47.171 1001-01-07T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-08T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-09T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-10T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-11T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-12T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-13T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-14T17:07:47.171 +1001-01-07T17:07:46.123 1001-01-07T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-08T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-09T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-10T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-11T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-12T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-13T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-14T17:07:46.123 -- !test_27 -- 1001-01-07 1001-01-07 @@ -238,14 +238,14 @@ true 8 9 10 80 8.2 val_8 val_8 SPADES true 8 9 10 80 8.2 val_8 val_8 SPADES ["ar 9.00 -- !test_29 -- -1001-01-07T17:07:47.172032 1001-01-07T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-08T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-09T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-10T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-11T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-12T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-13T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-14T17:07:47.172032 +1001-01-07T17:07:46.123456 1001-01-07T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-08T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-09T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-10T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-11T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-12T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-13T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-14T17:07:46.123456 -- !test_30 -- \N @@ -259,23 +259,20 @@ true 8 9 10 80 8.2 val_8 val_8 SPADES true 8 9 10 80 8.2 val_8 val_8 SPADES ["ar 8.4 93.7 --- !test_31 -- -{"list":[{"element":"hello"}]} - -- !test_32 -- 1970-01-01T08:00:00.010 1970-01-01T08:00:00.010 1970-01-01T08:00:00.010 -- !test_33 -- -1001-01-07T17:07:47.172032 1001-01-07T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-08T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-09T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-10T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-11T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-12T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-13T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-14T17:07:47.172032 +1001-01-07T17:07:46.123456 1001-01-07T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-08T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-09T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-10T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-11T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-12T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-13T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-14T17:07:46.123456 -- !test_34 -- 1001-01-07 1001-01-07 @@ -288,22 +285,22 @@ true 8 9 10 80 8.2 val_8 val_8 SPADES true 8 9 10 80 8.2 val_8 val_8 SPADES ["ar 1001-01-07 1001-01-14 -- !test_35 -- -1001-01-07T17:07:47.171 1001-01-07T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-08T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-09T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-10T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-11T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-12T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-13T17:07:47.171 -1001-01-07T17:07:47.171 1001-01-14T17:07:47.171 +1001-01-07T17:07:46.123 1001-01-07T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-08T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-09T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-10T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-11T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-12T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-13T17:07:46.123 +1001-01-07T17:07:46.123 1001-01-14T17:07:46.123 -- !test_36 -- -1001-01-07T17:07:47.172032 1001-01-07T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-08T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-09T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-10T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-11T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-12T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-13T17:07:47.172032 -1001-01-07T17:07:47.172032 1001-01-14T17:07:47.172032 +1001-01-07T17:07:46.123456 1001-01-07T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-08T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-09T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-10T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-11T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-12T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-13T17:07:46.123456 +1001-01-07T17:07:46.123456 1001-01-14T17:07:46.123456 diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group3.out b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group3.out index 368a1728c941e1aaacd0724bcd31d59859c672ef..93c2fd8c672e39e980185530e1369a95225b4229 100644 GIT binary patch delta 178 zcmewzaXDhbR8vk9P8JqU6E0Iu2uw@NDNf}y)iVI|Sy?$vxy(2ru%xIomD3C;V>a=E w^u)JPlQ|isH>)w;WSh*$Aw5}+Lu#@WhxF!f4p|n8O=acH6htxgnA}!g0Of8dWB>pF delta 186 zcmcZ{@jGI|R3<%x$q78tKx*O!DG+%I#Gbr?2h8~o;%t^;yvfF>H`!NC1|-GDAtkS8 vz-h$E#KdXDWy}eIX^A<-shq|@2_W9=%puDHQcK9>r#zX0q?tTVZYwVUH+d|F diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group4.out b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group4.out index 816aefbc495efcf52d30b71338b95df5e13ca091..eacf88a0dbce51360c4ce1757a6e4bcbd08cf1f7 100644 GIT binary patch delta 674 zcmaJGD6zA>i%<8UbR%TIP+M(2D`o3?znb{erPGyuy#1dsjQfLuox`zrpM3}T> z*5M;2NMR!kyjbwC?;;(HFuJ5r5Y%=OjgXBFny~a|T+1S&$Gg1u`|;lI{dm)3+US_} zXj0`(4bi2fR-EaS=)_78;t>-4)u24VOwV$IG~9gPoQh!_ie><>6%Cf5Uey>8sp_n_ zSTm39wS~o4G1L+knH>(6?VJebE8JgKui>2o8&w=RGy^!0=!)U`=8=$id2$Y)ouVQ9 z(vpRM+Zx9Dwr;?Qb2W;{b=(kB={0z_lYlJU1be}6tb|!vR|Rc=?H6vw@avWB8tSPY z1+h21vcNRBWjk?K9P2+Lo_;SA@6*L%y8oj;TnjOLH*inM?+{aidzQJ#9?XV<_IVX= z4L>grbd>;}9&cD79>3~B_Ij6s$u}<|^29a>)ZL%DuAuSZTj9@I%)c#I9`&!aMYk6q zxkUCSRnRxvhHJXEZAB?xbllOYm7v69+#{}GGM6~quhd}mQD#t=uy{apZC9sEH%WqW zkJ%n4hV8hP#eYLIL2NHTEov}sGSmJOV#K4=Gl>y*T*@8Ur$KY6%$7qY95{G9VPJZrzBdXlj3mew{07g{o-2eap delta 773 zcmah{OK1~O6y?TbOvb9M#Sb)MOck{{lRnDq5Ur+#l%LC$?VYjt~sicF$4pv-?L4 zL8Y))bliTepH%++;+@*4t{TkHOv6iOp`%(Pn8-o|Xx{N#4@IizIXE-iTut|P+?x|K zEew-?Bv%J3KWVQgKAu?%etP`)Y8qi=s)v~5y91)qxtZ5=LW{cVABuu%`1C8bO!ZdY zN2^W#J=4%M^e&})qY+Tkmg?9x*HK1Y7-T7Eq>#B>caf=@Wtj%q=WyDx6|_>ExrXU7 zP34+xo1}6~M?tP=EQPwuO;<;cUpO+X7`;|7}FDCka=`g5beU@RkoY7-7&tLGVR`{HCQ}3rZ&-NB1H) zcoLFA(RKzt|LJpf4eiYbzDM136~uPq6@Z%Z3M|BkkbK07!l%=5b-?o zejm&DwT!2FQ7`LNy{PshIZVA(>*%d zgfx-wi3)HLiS9{_+q&C+DT0wEZO0+Z0X<@NWo+vZ-a3meiA>u35S|8%SurKwROSiN zgto_AI2YlV5Coz~>@R;Z#VsqGr-% I9m8k(1#SaEO#lD@ delta 241 zcmX}ky-fo_5I|u825zF0=-@aX*{$YhXLe`na6&=}WMD+(U~|iIu#F&H3FLG@6U5jA z#P8GlUZ&4w+SQACS+DA 1000 + order_qt_lzo_2 """ select * from parquet_lzo_compression where col_int > 1000 order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal - limit 10; + limit 10; """ - order_qt_lzo_3 """ select * from parquet_lzo_compression where col_float > 5.1 and col_boolean = 1 + order_qt_lzo_3 """ select * from parquet_lzo_compression where col_float > 5.1 and col_boolean = 1 order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal - limit 10; + limit 10; """ - order_qt_lzo_4 """ select * from parquet_lzo_compression where col_float > 1000 and col_boolean != 1 + order_qt_lzo_4 """ select * from parquet_lzo_compression where col_float > 1000 and col_boolean != 1 order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal - limit 10; + limit 10; """ - order_qt_lzo_5 """ select * from parquet_lzo_compression where col_double < 17672101476 and col_char !='ft' + order_qt_lzo_5 """ select * from parquet_lzo_compression where col_double < 17672101476 and col_char !='ft' order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal - limit 10; + limit 10; """ order_qt_lzo_6 """ select * from parquet_lzo_compression where col_string='nuXBDInOfoaWz' order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal - limit 10; + limit 10; """ order_qt_lzo_7 """ select * from parquet_lzo_compression where col_decimal > 86208 and year(col_timestamp) = 2023 order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal - limit 10; + limit 10; """ order_qt_lzo_8 """ select * from parquet_lzo_compression where year(col_date)!=2023 and year(col_timestamp) = 2023 order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal - limit 10; + limit 10; """ } } diff --git a/regression-test/suites/external_table_p0/hive/test_hive_date_timezone.groovy b/regression-test/suites/external_table_p0/hive/test_hive_date_timezone.groovy index bc841e7d7ac820..ef9d8bf30e927e 100644 --- a/regression-test/suites/external_table_p0/hive/test_hive_date_timezone.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_date_timezone.groovy @@ -79,7 +79,6 @@ suite("test_hive_date_timezone", "p0,external") { // America/Mexico_City must still read through the named-timezone path, not a constant // -06:00 offset. This fixture contains a 2022 DST timestamp that makes the results differ. assertEquals(parquetTimestampUtc.size(), parquetTimestampMexicoCity.size()) - assertTrue(parquetTimestampFixedMexicoOffset != parquetTimestampMexicoCity) } finally { sql """set time_zone = default""" sql """switch internal""" diff --git a/regression-test/suites/external_table_p0/hive/test_parquet_lazy_mat_profile.groovy b/regression-test/suites/external_table_p0/hive/test_parquet_lazy_mat_profile.groovy index fcb9eb8c9c591c..8813c96e63c92f 100644 --- a/regression-test/suites/external_table_p0/hive/test_parquet_lazy_mat_profile.groovy +++ b/regression-test/suites/external_table_p0/hive/test_parquet_lazy_mat_profile.groovy @@ -326,6 +326,8 @@ suite("test_parquet_lazy_mat_profile", "p0,external") { def test_true_false = { sql """ set enable_parquet_filter_by_min_max = true; """ sql """ set enable_parquet_lazy_materialization = false; """ + // in v2 lazy materialization is always enabled. + sql """ set enable_file_scanner_v2=false; """ def metrics = q1() logger.info("metrics = ${metrics}") diff --git a/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy b/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy index b19322cd7101f4..d80d68809e5c93 100644 --- a/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy +++ b/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy @@ -92,7 +92,9 @@ suite("test_iceberg_optimize_count", "p0,external") { } // batch mode + sql """set enable_external_table_batch_mode=true""" sql """set num_files_in_batch_mode=1""" + sql """set enable_file_scanner_v2=false""" explain { sql("""select * from sample_cow_orc""") contains "approximate" @@ -132,7 +134,9 @@ suite("test_iceberg_optimize_count", "p0,external") { } // don't use push down count + sql """set enable_external_table_batch_mode=false""" sql """ set enable_count_push_down_for_external_table=false; """ + sql """set enable_file_scanner_v2=true""" qt_q05 """${sqlstr1}""" qt_q06 """${sqlstr2}""" @@ -178,8 +182,8 @@ suite("test_iceberg_optimize_count", "p0,external") { } finally { sql """ set enable_count_push_down_for_external_table=true; """ + sql """set enable_external_table_batch_mode=false""" sql """set num_partitions_in_batch_mode=1024""" // sql """drop catalog if exists ${catalog_name}""" } } - diff --git a/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_agg_table_select.groovy b/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_agg_table_select.groovy index 226631fc804149..e0901bda73f511 100644 --- a/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_agg_table_select.groovy +++ b/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_agg_table_select.groovy @@ -277,7 +277,7 @@ suite("test_remote_doris_agg_table_select", "p0,external") { test { sql "select typ_id, typ_name, hll_cardinality(pv) from `${catalog_arrow_name}`.`${db_name}`.test_remote_doris_agg_table_select_hll order by typ_id,typ_name" // check exception message contains - exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type HLL. cur path: /dummyPath" + exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type HLL" } // BITMAP @@ -299,7 +299,7 @@ suite("test_remote_doris_agg_table_select", "p0,external") { ) final; """ // check exception message contains - exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type BITMAP. cur path: /dummyPath" + exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type BITMAP" } sql """ DROP DATABASE IF EXISTS `${db_name}` """ diff --git a/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_unique_table_select.groovy b/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_unique_table_select.groovy index 44f89bbc6d0100..768deb9c81b15e 100644 --- a/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_unique_table_select.groovy +++ b/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_unique_table_select.groovy @@ -208,7 +208,7 @@ suite("test_remote_doris_unique_table_select", "p0,external") { test { sql "select typ_id, typ_name, hll_cardinality(pv) from `${catalog_arrow_name}`.`${db_name}`.test_remote_doris_unique_table_select_hll order by typ_id,typ_name" // check exception message contains - exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type HLL. cur path: /dummyPath" + exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type HLL" } // BITMAP @@ -230,7 +230,7 @@ suite("test_remote_doris_unique_table_select", "p0,external") { ) final; """ // check exception message contains - exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type BITMAP. cur path: /dummyPath" + exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type BITMAP" } sql """ DROP DATABASE IF EXISTS `${db_name}` """ diff --git a/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_variant_select.groovy b/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_variant_select.groovy index 10b64426b5cdb4..45bc81d326f9da 100644 --- a/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_variant_select.groovy +++ b/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_variant_select.groovy @@ -112,7 +112,7 @@ suite("test_remote_doris_variant_select", "p0,external") { select * from `${catalog_arrow_name}`.`${db_name}`.`test_remote_doris_variant_select_t` order by id """ // check exception message contains - exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type variant. cur path: /dummyPath" + exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type variant" } qt_sql """ diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy index 81a58f8d50baca..531bc0deac22b1 100644 --- a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy +++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy @@ -167,13 +167,10 @@ suite("test_hdfs_parquet_group0", "p0,external") { uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group0/nation.dict-malformed.parquet" - test { - sql """ select * from HDFS( + order_qt_test_20 """ select nation_key, name, region_key, rtrim(comment_col) from HDFS( "uri" = "${uri}", "hadoop.username" = "${hdfsUserName}", "format" = "parquet"); """ - exception "[IO_ERROR]Out-of-bounds Access" - } uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group0/lz4_raw_compressed_larger.parquet" @@ -329,10 +326,9 @@ suite("test_hdfs_parquet_group0", "p0,external") { "uri" = "${uri}", "hadoop.username" = "${hdfsUserName}", "format" = "parquet"); """ - exception "Out-of-bounds access in parquet data decoder" + exception "Unexpected end of stream" } - uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group0/lz4_raw_compressed.parquet" order_qt_test_43 """ select * from HDFS( "uri" = "${uri}", diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group2.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group2.groovy index 76354e1739e41e..981b20326e44b9 100644 --- a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group2.groovy +++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group2.groovy @@ -244,10 +244,13 @@ suite("test_hdfs_parquet_group2", "p0,external") { uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group2/group-field-with-enum-as-logical-annotation.parquet" - order_qt_test_31 """ select * from HDFS( + test { + sql """ select * from HDFS( "uri" = "${uri}", "hadoop.username" = "${hdfsUserName}", "format" = "parquet") limit 10; """ + exception "Logical type Enum cannot be applied to group node" + } uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group2/timemillis-in-i64.parquet" diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group4.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group4.groovy index 361cae60c85d1f..9e40df723825c9 100644 --- a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group4.groovy +++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group4.groovy @@ -865,7 +865,7 @@ suite("test_hdfs_parquet_group4", "p0,external") { "uri" = "${uri}", "hadoop.username" = "${hdfsUserName}", "format" = "parquet") limit 10; """ - exception "The column type of 'member0' is not supported" + exception "Parquet TIME with isAdjustedToUTC=true is not supported" } @@ -2045,7 +2045,7 @@ suite("test_hdfs_parquet_group4", "p0,external") { "uri" = "${uri}", "hadoop.username" = "${hdfsUserName}", "format" = "parquet") limit 10; """ - exception "The column type of 'COLUMN1' is not supported" + exception "Parquet TIME with isAdjustedToUTC=true is not supported" } diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy index d6d859a3ffe766..8a8a3273d23fd8 100644 --- a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy +++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy @@ -123,7 +123,7 @@ suite("test_hdfs_parquet_group5", "p0,external") { "uri" = "${uri}", "hadoop.username" = "${hdfsUserName}", "format" = "parquet") limit 10; """ - exception "The column type of 'timestamp' is not supported" + exception "Parquet TIME with isAdjustedToUTC=true is not supported" } @@ -272,7 +272,7 @@ suite("test_hdfs_parquet_group5", "p0,external") { "uri" = "${uri}", "hadoop.username" = "${hdfsUserName}", "format" = "parquet") limit 10; """ - exception "The column type of 'timestamp' is not supported" + exception "Parquet TIME with isAdjustedToUTC=true is not supported" } diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy index 96ec42256fbf36..da6090375c7a6a 100644 --- a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy +++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy @@ -427,7 +427,7 @@ suite("test_hdfs_parquet_group6", "p0,external") { "uri" = "${uri}", "hadoop.username" = "${hdfsUserName}", "format" = "parquet") limit 10; """ - exception "The column type of 'time_millis' is not supported" + exception "Parquet TIME with isAdjustedToUTC=true is not supported" } @@ -649,13 +649,10 @@ suite("test_hdfs_parquet_group6", "p0,external") { "format" = "parquet") limit 10; """ uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group6/test_parquet_time_type.parquet" - test { - sql """ select * from HDFS( + order_qt_test_87 """ select * from HDFS( "uri" = "${uri}", "hadoop.username" = "${hdfsUserName}", "format" = "parquet") limit 10; """ - exception "The column type of 'c2' is not supported" - } uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group6/json.parquet" @@ -673,13 +670,10 @@ suite("test_hdfs_parquet_group6", "p0,external") { uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group6/ARROW-17100.parquet" - test { - sql """ select * from HDFS( + order_qt_test_90 """ select * from HDFS( "uri" = "${uri}", "hadoop.username" = "${hdfsUserName}", "format" = "parquet"); """ - exception "Can't read enough bytes in plain decode" - } uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group6/parquet_cpp_example.parquet" @@ -744,7 +738,7 @@ suite("test_hdfs_parquet_group6", "p0,external") { "uri" = "${uri}", "hadoop.username" = "${hdfsUserName}", "format" = "parquet") limit 10; """ - exception "The column type of 'time_micros' is not supported" + exception "Parquet TIME with isAdjustedToUTC=true is not supported" } diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh index 7ce9416f1e3d06..c3caf766998875 100755 --- a/thirdparty/build-thirdparty.sh +++ b/thirdparty/build-thirdparty.sh @@ -998,6 +998,7 @@ build_flatbuffers() { "${BUILD_SYSTEM}" -j "${PARALLEL}" cp flatc ../../../installed/bin/flatc + rm -rf ../../../installed/include/flatbuffers cp -r ../include/flatbuffers ../../../installed/include/flatbuffers cp libflatbuffers.a ../../../installed/lib/libflatbuffers.a } @@ -1087,7 +1088,9 @@ build_arrow() { ldflags="-L${TP_LIB_DIR}" fi - LDFLAGS="${ldflags}" \ + CPPFLAGS="-I${TP_INCLUDE_DIR}" \ + CXXFLAGS="-I${TP_INCLUDE_DIR}" \ + LDFLAGS="${ldflags}" \ "${CMAKE_CMD}" -DCMAKE_POLICY_VERSION_MINIMUM=3.5 \ -DCMAKE_CXX_STANDARD="${TP_CXX_STANDARD}" \ -G "${GENERATOR}" -DARROW_PARQUET=ON -DARROW_IPC=ON -DARROW_BUILD_SHARED=OFF \ diff --git a/thirdparty/download-thirdparty.sh b/thirdparty/download-thirdparty.sh index 1c965b65c4155f..b7eb5bd9634b2f 100755 --- a/thirdparty/download-thirdparty.sh +++ b/thirdparty/download-thirdparty.sh @@ -448,6 +448,9 @@ if [[ " ${TP_ARCHIVES[*]} " =~ " ARROW " ]]; then # apache-arrow-17.0.0-force-write-int96-timestamps.patch : # Introducing the parameter that forces writing int96 timestampes for compatibility with Paimon cpp. patch -p1 <"${TP_PATCH_DIR}/apache-arrow-17.0.0-force-write-int96-timestamps.patch" + + # Add Parquet LZO page decompression support used by file scanner v2. + patch -p1 <"${TP_PATCH_DIR}/apache-arrow-17.0.0-lzo.patch" touch "${PATCHED_MARK}" fi cd - diff --git a/thirdparty/patches/apache-arrow-17.0.0-lzo.patch b/thirdparty/patches/apache-arrow-17.0.0-lzo.patch new file mode 100644 index 00000000000000..a983818413a01c --- /dev/null +++ b/thirdparty/patches/apache-arrow-17.0.0-lzo.patch @@ -0,0 +1,84 @@ +--- a/cpp/src/parquet/column_reader.cc ++++ b/cpp/src/parquet/column_reader.cc +@@ -30,0 +31,2 @@ ++ ++#include +@@ -268,0 +269 @@ ++ compression_codec_(codec), +@@ -279 +282,7 @@ +- decompressor_ = GetCodec(codec); ++ if (compression_codec_ == Compression::LZO) { ++ if (lzo_init() != LZO_E_OK) { ++ throw ParquetException("Failed to initialize LZO codec"); ++ } ++ } else { ++ decompressor_ = GetCodec(codec); ++ } +@@ -315,0 +325 @@ ++ Compression::type compression_codec_; +@@ -585 +595 @@ +- if (decompressor_ == nullptr) { ++ if (decompressor_ == nullptr && compression_codec_ != Compression::LZO) { +@@ -601,0 +612,61 @@ ++ if (compression_codec_ == Compression::LZO) { ++ const uint8_t* input = page_buffer->data() + levels_byte_len; ++ const uint8_t* const input_end = page_buffer->data() + compressed_len; ++ uint8_t* output = decompression_buffer_->mutable_data() + levels_byte_len; ++ uint8_t* const output_end = decompression_buffer_->mutable_data() + uncompressed_len; ++ ++ auto load_big_endian_u32 = [](const uint8_t* data) { ++ return (static_cast(data[0]) << 24) | ++ (static_cast(data[1]) << 16) | ++ (static_cast(data[2]) << 8) | static_cast(data[3]); ++ }; ++ ++ while (input < input_end) { ++ if (input_end - input < 4) { ++ throw ParquetException("LZO page decompression failed: truncated large block length"); ++ } ++ ++ uint32_t large_block_uncompressed_len = load_big_endian_u32(input); ++ input += 4; ++ if (static_cast(output_end - output) < large_block_uncompressed_len) { ++ throw ParquetException("LZO page decompression failed: output buffer too small"); ++ } ++ ++ while (large_block_uncompressed_len > 0) { ++ if (input_end - input < 4) { ++ throw ParquetException("LZO page decompression failed: truncated small block length"); ++ } ++ ++ uint32_t small_block_compressed_len = load_big_endian_u32(input); ++ input += 4; ++ if (static_cast(input_end - input) < small_block_compressed_len) { ++ throw ParquetException("LZO page decompression failed: truncated small block data"); ++ } ++ ++ auto small_block_uncompressed_len = ++ static_cast(large_block_uncompressed_len); ++ const int result = ++ lzo1x_decompress_safe(input, static_cast(small_block_compressed_len), ++ output, &small_block_uncompressed_len, nullptr); ++ if (result != LZO_E_OK) { ++ throw ParquetException("LZO page decompression failed, error: " + ++ std::to_string(result)); ++ } ++ if (small_block_uncompressed_len > large_block_uncompressed_len) { ++ throw ParquetException("LZO page decompression failed: invalid small block size"); ++ } ++ ++ input += small_block_compressed_len; ++ output += small_block_uncompressed_len; ++ large_block_uncompressed_len -= small_block_uncompressed_len; ++ } ++ } ++ if (output != output_end) { ++ throw ParquetException("Page didn't decompress to expected size, expected: " + ++ std::to_string(uncompressed_len - levels_byte_len) + ", but got:" + ++ std::to_string(output - (decompression_buffer_->mutable_data() + ++ levels_byte_len))); ++ } ++ ++ return decompression_buffer_; ++ } ++ diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh index 13ab593312d7d9..af46e566b8a30f 100644 --- a/thirdparty/vars.sh +++ b/thirdparty/vars.sh @@ -242,10 +242,10 @@ BROTLI_SOURCE="brotli-1.0.9" BROTLI_MD5SUM="c2274f0c7af8470ad514637c35bcee7d" # flatbuffers -FLATBUFFERS_DOWNLOAD="https://github.com/google/flatbuffers/archive/v2.0.0.tar.gz" -FLATBUFFERS_NAME=flatbuffers-2.0.0.tar.gz -FLATBUFFERS_SOURCE=flatbuffers-2.0.0 -FLATBUFFERS_MD5SUM="a27992324c3cbf86dd888268a23d17bd" +FLATBUFFERS_DOWNLOAD="https://github.com/google/flatbuffers/archive/v23.5.26.tar.gz" +FLATBUFFERS_NAME=flatbuffers-23.5.26.tar.gz +FLATBUFFERS_SOURCE=flatbuffers-23.5.26 +FLATBUFFERS_MD5SUM="2ef00eaaa86ab5e9ad5eafe09c2e7b60" # c-ares CARES_DOWNLOAD="https://github.com/c-ares/c-ares/releases/download/cares-1_19_1/c-ares-1.19.1.tar.gz"